<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DZone Snippets: spellcheck code</title>
    <link>http://snippets.dzone.com/posts</link>
    <pubDate>Thu, 24 Jul 2008 04:21:42 GMT</pubDate>
    <description>DZone Snippets: spellcheck code</description>
    <item>
      <title>BloominSimple: Ultra-easy, pure Ruby Bloom filter library</title>
      <link>http://snippets.dzone.com/posts/show/4235</link>
      <description>Requires &lt;a href="http://snippets.dzone.com/posts/show/4234"&gt;BitField.&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;#        NAME: BloominSimple&lt;br /&gt;#      AUTHOR: Peter Cooper&lt;br /&gt;#     LICENSE: MIT ( http://www.opensource.org/licenses/mit-license.php )&lt;br /&gt;#   COPYRIGHT: (c) 2007 Peter Cooper&lt;br /&gt;# DESCRIPTION: Very basic, pure Ruby Bloom filter. Uses my BitField, pure Ruby&lt;br /&gt;#              bit field library (http://snippets.dzone.com/posts/show/4234).&lt;br /&gt;#              Supports custom hashing (default is 3).&lt;br /&gt;#&lt;br /&gt;#              Create a Bloom filter that uses default hashing with 1Mbit wide bitfield&lt;br /&gt;#                bf = BloominSimple.new(1_000_000)&lt;br /&gt;#              &lt;br /&gt;#              Add items to it&lt;br /&gt;#                File.open('/usr/share/dict/words').each { |a| bf.add(a) }&lt;br /&gt;#&lt;br /&gt;#              Check for existence of items in the filter&lt;br /&gt;#                bf.includes?("people")     # =&gt; true&lt;br /&gt;#                bf.includes?("kwyjibo")    # =&gt; false&lt;br /&gt;#&lt;br /&gt;#              Add better hashing (c'est easy!)&lt;br /&gt;#                require 'digest/sha1'&lt;br /&gt;#                b = BloominSimple.new(1_000_000) do |item|&lt;br /&gt;#                  Digest::SHA1.digest(item.downcase.strip).unpack("VVVV")&lt;br /&gt;#                end&lt;br /&gt;#&lt;br /&gt;#              More&lt;br /&gt;#                %w{wonderful ball stereo jester flag shshshshsh nooooooo newyorkcity}.each do |a|&lt;br /&gt;#                  puts "#{sprintf("%15s", a)}: #{b.includes?(a)}"&lt;br /&gt;#                end&lt;br /&gt;#&lt;br /&gt;#                 #  =&gt;   wonderful: true&lt;br /&gt;#                 #  =&gt;        ball: true&lt;br /&gt;#                 #  =&gt;      stereo: true&lt;br /&gt;#                 #  =&gt;      jester: true&lt;br /&gt;#                 #  =&gt;        flag: true&lt;br /&gt;#                 #  =&gt;  shshshshsh: false&lt;br /&gt;#                 #  =&gt;    nooooooo: false&lt;br /&gt;#                 #  =&gt; newyorkcity: false&lt;br /&gt;&lt;br /&gt;require 'benchmark'&lt;br /&gt;require 'bitfield'&lt;br /&gt;&lt;br /&gt;class BloominSimple&lt;br /&gt;  attr_reader :bitfield, :hasher&lt;br /&gt;  &lt;br /&gt;  def initialize(bitsize, &amp;block)&lt;br /&gt;    @bitfield = BitField.new(bitsize)&lt;br /&gt;    @size = bitsize&lt;br /&gt;    @hasher = block || lambda do |word|&lt;br /&gt;      word = word.downcase.strip&lt;br /&gt;      [h1 = word.sum, h2 = word.hash, h2 + h1 ** 3]&lt;br /&gt;    end&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def add(item)&lt;br /&gt;    @hasher[item].each { |hi| @bitfield[hi % @size] = 1 }&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def includes?(item)&lt;br /&gt;    @hasher[item].each { |hi| return false unless @bitfield[hi % @size] == 1 } and true&lt;br /&gt;  end&lt;br /&gt;end&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Mon, 02 Jul 2007 02:14:09 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/4235</guid>
      <author>peter (Peter Cooperx)</author>
    </item>
  </channel>
</rss>
