<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DZone Snippets: bloom code</title>
    <link>http://snippets.dzone.com/posts</link>
    <pubDate>Fri, 25 Jul 2008 06:47:35 GMT</pubDate>
    <description>DZone Snippets: bloom code</description>
    <item>
      <title>BloominSimple: Ultra-easy, pure Ruby Bloom filter library</title>
      <link>http://snippets.dzone.com/posts/show/4235</link>
      <description>Requires &lt;a href="http://snippets.dzone.com/posts/show/4234"&gt;BitField.&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;#        NAME: BloominSimple&lt;br /&gt;#      AUTHOR: Peter Cooper&lt;br /&gt;#     LICENSE: MIT ( http://www.opensource.org/licenses/mit-license.php )&lt;br /&gt;#   COPYRIGHT: (c) 2007 Peter Cooper&lt;br /&gt;# DESCRIPTION: Very basic, pure Ruby Bloom filter. Uses my BitField, pure Ruby&lt;br /&gt;#              bit field library (http://snippets.dzone.com/posts/show/4234).&lt;br /&gt;#              Supports custom hashing (default is 3).&lt;br /&gt;#&lt;br /&gt;#              Create a Bloom filter that uses default hashing with 1Mbit wide bitfield&lt;br /&gt;#                bf = BloominSimple.new(1_000_000)&lt;br /&gt;#              &lt;br /&gt;#              Add items to it&lt;br /&gt;#                File.open('/usr/share/dict/words').each { |a| bf.add(a) }&lt;br /&gt;#&lt;br /&gt;#              Check for existence of items in the filter&lt;br /&gt;#                bf.includes?("people")     # =&gt; true&lt;br /&gt;#                bf.includes?("kwyjibo")    # =&gt; false&lt;br /&gt;#&lt;br /&gt;#              Add better hashing (c'est easy!)&lt;br /&gt;#                require 'digest/sha1'&lt;br /&gt;#                b = BloominSimple.new(1_000_000) do |item|&lt;br /&gt;#                  Digest::SHA1.digest(item.downcase.strip).unpack("VVVV")&lt;br /&gt;#                end&lt;br /&gt;#&lt;br /&gt;#              More&lt;br /&gt;#                %w{wonderful ball stereo jester flag shshshshsh nooooooo newyorkcity}.each do |a|&lt;br /&gt;#                  puts "#{sprintf("%15s", a)}: #{b.includes?(a)}"&lt;br /&gt;#                end&lt;br /&gt;#&lt;br /&gt;#                 #  =&gt;   wonderful: true&lt;br /&gt;#                 #  =&gt;        ball: true&lt;br /&gt;#                 #  =&gt;      stereo: true&lt;br /&gt;#                 #  =&gt;      jester: true&lt;br /&gt;#                 #  =&gt;        flag: true&lt;br /&gt;#                 #  =&gt;  shshshshsh: false&lt;br /&gt;#                 #  =&gt;    nooooooo: false&lt;br /&gt;#                 #  =&gt; newyorkcity: false&lt;br /&gt;&lt;br /&gt;require 'benchmark'&lt;br /&gt;require 'bitfield'&lt;br /&gt;&lt;br /&gt;class BloominSimple&lt;br /&gt;  attr_reader :bitfield, :hasher&lt;br /&gt;  &lt;br /&gt;  def initialize(bitsize, &amp;block)&lt;br /&gt;    @bitfield = BitField.new(bitsize)&lt;br /&gt;    @size = bitsize&lt;br /&gt;    @hasher = block || lambda do |word|&lt;br /&gt;      word = word.downcase.strip&lt;br /&gt;      [h1 = word.sum, h2 = word.hash, h2 + h1 ** 3]&lt;br /&gt;    end&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def add(item)&lt;br /&gt;    @hasher[item].each { |hi| @bitfield[hi % @size] = 1 }&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def includes?(item)&lt;br /&gt;    @hasher[item].each { |hi| return false unless @bitfield[hi % @size] == 1 } and true&lt;br /&gt;  end&lt;br /&gt;end&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Mon, 02 Jul 2007 02:14:09 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/4235</guid>
      <author>peter (Peter Cooperx)</author>
    </item>
    <item>
      <title>BitField: A fast(ish), pure Ruby bit field "type"</title>
      <link>http://snippets.dzone.com/posts/show/4234</link>
      <description>&lt;code&gt;#        NAME: BitField&lt;br /&gt;#      AUTHOR: Peter Cooper&lt;br /&gt;#     LICENSE: MIT ( http://www.opensource.org/licenses/mit-license.php )&lt;br /&gt;#   COPYRIGHT: (c) 2007 Peter Cooper (http://www.petercooper.co.uk/)&lt;br /&gt;#     VERSION: v4&lt;br /&gt;#     HISTORY: v4 (fixed bug where setting 0 bits to 0 caused a set to 1)&lt;br /&gt;#              v3 (supports dynamic bitwidths for array elements.. now doing 32 bit widths default)&lt;br /&gt;#              v2 (now uses 1 &lt;&lt; y, rather than 2 ** y .. it's 21.8 times faster!)&lt;br /&gt;#              v1 (first release)&lt;br /&gt;#&lt;br /&gt;# DESCRIPTION: Basic, pure Ruby bit field. Pretty fast (for what it is) and memory efficient.&lt;br /&gt;#              I've written a pretty intensive test suite for it and it passes great. &lt;br /&gt;#              Works well for Bloom filters (the reason I wrote it).&lt;br /&gt;#&lt;br /&gt;#              Create a bit field 1000 bits wide&lt;br /&gt;#                bf = BitField.new(1000)&lt;br /&gt;#&lt;br /&gt;#              Setting and reading bits&lt;br /&gt;#                bf[100] = 1&lt;br /&gt;#                bf[100]    .. =&gt; 1&lt;br /&gt;#                bf[100] = 0&lt;br /&gt;#&lt;br /&gt;#              More&lt;br /&gt;#                bf.to_s = "10101000101010101"  (example)&lt;br /&gt;#                bf.total_set         .. =&gt; 10  (example - 10 bits are set to "1")&lt;br /&gt;&lt;br /&gt;class BitField&lt;br /&gt;  attr_reader :size&lt;br /&gt;  include Enumerable&lt;br /&gt;  &lt;br /&gt;  ELEMENT_WIDTH = 32&lt;br /&gt;  &lt;br /&gt;  def initialize(size)&lt;br /&gt;    @size = size&lt;br /&gt;    @field = Array.new(((size - 1) / ELEMENT_WIDTH) + 1, 0)&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  # Set a bit (1/0)&lt;br /&gt;  def []=(position, value)&lt;br /&gt;    if value == 1&lt;br /&gt;      @field[position / ELEMENT_WIDTH] |= 1 &lt;&lt; (position % ELEMENT_WIDTH)&lt;br /&gt;    elsif (@field[position / ELEMENT_WIDTH]) &amp; (1 &lt;&lt; (position % ELEMENT_WIDTH)) != 0&lt;br /&gt;      @field[position / ELEMENT_WIDTH] ^= 1 &lt;&lt; (position % ELEMENT_WIDTH)&lt;br /&gt;    end&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  # Read a bit (1/0)&lt;br /&gt;  def [](position)&lt;br /&gt;    @field[position / ELEMENT_WIDTH] &amp; 1 &lt;&lt; (position % ELEMENT_WIDTH) &gt; 0 ? 1 : 0&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  # Iterate over each bit&lt;br /&gt;  def each(&amp;block)&lt;br /&gt;    @size.times { |position| yield self[position] }&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  # Returns the field as a string like "0101010100111100," etc.&lt;br /&gt;  def to_s&lt;br /&gt;    inject("") { |a, b| a + b.to_s }&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  # Returns the total number of bits that are set&lt;br /&gt;  # (The technique used here is about 6 times faster than using each or inject direct on the bitfield)&lt;br /&gt;  def total_set&lt;br /&gt;    @field.inject(0) { |a, byte| a += byte &amp; 1 and byte &gt;&gt;= 1 until byte == 0; a }&lt;br /&gt;  end&lt;br /&gt;end&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;Here's the tests if you want to run:&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;require "test/unit"&lt;br /&gt;require "bitfield"&lt;br /&gt;&lt;br /&gt;class TestLibraryFileName &lt; Test::Unit::TestCase&lt;br /&gt;  def setup&lt;br /&gt;    @public_bf = BitField.new(1000)&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def test_basic&lt;br /&gt;    assert_equal 0, BitField.new(100)[0]&lt;br /&gt;    assert_equal 0, BitField.new(100)[1]&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def test_setting_and_unsetting&lt;br /&gt;    @public_bf[100] = 1&lt;br /&gt;    assert_equal 1, @public_bf[100]&lt;br /&gt;    @public_bf[100] = 0&lt;br /&gt;    assert_equal 0, @public_bf[100]&lt;br /&gt;  end&lt;br /&gt;&lt;br /&gt;  def test_random_setting_and_unsetting&lt;br /&gt;    100.times do&lt;br /&gt;      index = rand(1000)&lt;br /&gt;      @public_bf[index] = 1&lt;br /&gt;      assert_equal 1, @public_bf[index]&lt;br /&gt;      @public_bf[index] = 0&lt;br /&gt;      assert_equal 0, @public_bf[index]&lt;br /&gt;    end&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def test_multiple_setting&lt;br /&gt;    1.upto(999) do |pos|&lt;br /&gt;      2.times { @public_bf[pos] = 1 }&lt;br /&gt;      assert_equal 1, @public_bf[pos]&lt;br /&gt;    end&lt;br /&gt;  end&lt;br /&gt;&lt;br /&gt;  def test_multiple_unsetting&lt;br /&gt;    1.upto(999) do |pos|&lt;br /&gt;      2.times { @public_bf[pos] = 0 }&lt;br /&gt;      assert_equal 0, @public_bf[pos]&lt;br /&gt;    end&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def test_size&lt;br /&gt;    assert_equal 1000, @public_bf.size&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def test_to_s&lt;br /&gt;    bf = BitField.new(10)&lt;br /&gt;    bf[1] = 1&lt;br /&gt;    bf[5] = 1&lt;br /&gt;    assert_equal "0100010000", bf.to_s&lt;br /&gt;  end&lt;br /&gt;  &lt;br /&gt;  def test_total_set&lt;br /&gt;    bf = BitField.new(10)&lt;br /&gt;    bf[1] = 1&lt;br /&gt;    bf[5] = 1&lt;br /&gt;    assert_equal 2, bf.total_set&lt;br /&gt;  end&lt;br /&gt;end&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Mon, 02 Jul 2007 01:16:55 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/4234</guid>
      <author>peter (Peter Cooperx)</author>
    </item>
  </channel>
</rss>
