Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

About this user

« Newer Snippets
Older Snippets »
Showing 1-10 of 11 total  RSS 

Basic Trie data structure in Ruby

# Inspired by http://www.rubyquiz.com/quiz103.html
# cf. also http://en.wikipedia.org/wiki/Trie

#!/usr/local/bin/ruby -w

require 'pp'

class Hash

   def deep_merge!(second)
      # cf. http://snippets.dzone.com/posts/show/4146
      merger = proc { |key,v1,v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : v2 }
      self.merge!(second, &merger)
   end

   def nested_hash(array)
      node = self
      array.each do |i|
         node[i]=Hash.new if node[i].nil?
         node = node[i]
      end 
      self
   end

   def merge_nested_hash!(nested_hash)
      deep_merge!(nested_hash)
   end

   # code basis taken from: "Find every path and it's value in a Hash" by Florian Aßmann,
   # http://snippets.dzone.com/posts/show/3565

   def each_trie_path
      raise ArgumentError unless block_given?
      self.class.each_trie_path(self) { |path, object| yield(path, object) }
   end

   protected
   def self.each_trie_path(object, path = [], &block)  

      if object.is_a?(Hash)
         object.each do |key, value|

            if key == true && value == {}
               if path == [:root]  # special case for empty string: [[:root], {true=>{}}]
                  yield(path, {true=>{}})
                  next
               end
               yield(path, object)
            end

            self.each_trie_path(value, [path , key].flatten, &block)
         end
      else 
         yield(path, object)
      end
   end 

end


class Trie

   @hash = Hash.new.merge_nested_hash!(Hash.new)
   #@hash = Hash.new.merge_nested_hash!({:root=>{}})
   #@hash = Hash.new.merge_nested_hash!(Hash.new.nested_hash([:root]))
   class << self; attr_accessor :hash; end    # Trie.hash

   def initialize
      Trie.hash.merge_nested_hash!({:root=>{}})
   end


   def add_int(int)   # for int >= 0
      ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  # integer array; ex: [4,6,2]
      ia.unshift(:root).push(true)
      Trie.hash.merge_nested_hash!(Hash.new.nested_hash(ia))
   end

   def matchi(int)  # match integer
      ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  
      node = Trie.hash.fetch(:root,nil)
      ia.each do |digit|
         node = node[digit]
         #node = node.fetch(digit,nil)
         return false unless node
      end
      node.fetch(true,nil) ? true : false
   end

   def mfpi(int)   # match first part of integer
      ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }
      node = Trie.hash.fetch(:root,nil)
      ia.each do |digit|
         node = node[digit]
         return false unless node
      end
      return true
   end


   def add_word(word)
      ca = word.split(//u)    # UTF-8 aware character array
      ca.unshift(:root).push(true)
      Trie.hash.merge_nested_hash!(Hash.new.nested_hash(ca))
   end

   def match(word)   # match entire word
      ca = word.split(//u)    
      node = Trie.hash.fetch(:root,nil)
      ca.each do |char|
         node = node[char]
         #node = node.fetch(char,nil)
         return false unless node
      end
      node.fetch(true,nil) ? true : false
   end

   def mfpw(word)   # match first part of word
      ca = word.split(//u)    
      node = Trie.hash.fetch(:root,nil)
      ca.each do |char|
         node = node[char]
         return false unless node
      end
      return true
   end

end


trie = Trie.new
p Trie.hash

trie.add_word("word")
pp Trie.hash            
p trie.match("word")      
p trie.match("word2")   
trie.add_word("word2")
p trie.match("word2")   
pp Trie.hash            # {:root=>{"w"=>{"o"=>{"r"=>{"d"=>{true=>{}, "2"=>{true=>{}}}}}}}}

trie.add_word("")
p trie.match("")
pp Trie.hash

puts

p trie.match("word")      
p trie.mfpw("wor")     # match first part of word

puts

trie.add_int(12345)
p Trie.hash

trie.add_int(12980345)
pp Trie.hash

trie.add_int(8512)
pp Trie.hash
p trie.matchi(8512)
p trie.matchi(85)
p trie.mfpi(85)     # match first part of integer
p trie.matchi(51)

puts

puts
pp Trie.hash
paths = []
Trie.hash.each_trie_path { |path, value| paths.push([ path ]) }
#Trie.hash.each_trie_path { |path, value| paths.push([ path, value ]) }
#pp paths
puts
paths.each { |x| p x }
puts


#------------------------------------------------------


require 'pp'

class Hash
   def Hash.new_nested_hash
     Hash.new{|h,k| h[k]=Hash.new(&h.default_proc) }
   end
end


class Trie

   @hash = Hash.new_nested_hash
   #@hash = Hash.new_nested_hash.update(true=>{})  # add empty string by default
   class << self; attr_accessor :hash; end    # Trie.hash

   def <<(word)
      ca = word.split(//u)    # UTF-8 aware character array
      wl = ca.size            # word length
      str = ""
      wl.times { |x| str << "[ca.at(#{x})]" }
      str = "Trie.hash" << str << "[true]"
      #p str     # example: "Trie.hash[ca.at(0)][ca.at(1)][ca.at(2)][ca.at(3)][true]"
      eval(str)
   end


   def match(word)
      ca = word.split(//u)    # UTF-8 aware character array
      wl = ca.size            # word length
      str = ""

      wl.times { |x| str << ".fetch(ca.at(#{x}),nil)" }
      str = "Trie.hash" << str << ".fetch(true,nil)"
      #p str   # example: "Trie.hash.fetch(ca.at(0),nil).fetch(ca.at(1),nil).fetch(ca.at(2),nil).fetch(true,nil)"
      ret = eval(str) rescue nil

=begin
      # alternative
      wl.times { |x| str << ".fetch(ca.at(#{x}),{})" }
      str = "Trie.hash" << str << ".fetch(true,nil)"
      #p str   # example: "Trie.hash.fetch(ca.at(0),{}).fetch(ca.at(1),{}).fetch(ca.at(2),{}).fetch(true,nil)"
      ret = eval(str)
=end

      ret == {} ? true : false   # {} is the default value created by Trie.hash or Hash.new_nested_hash respectively
   end

end


trie = Trie.new
trie << "word"
pp Trie.hash            
p trie.match("word")      
p trie.match("word2")   
trie << "word2"
p trie.match("word2")   
pp Trie.hash            # {"w"=>{"o"=>{"r"=>{"d"=>{true=>{}, "2"=>{true=>{}}}}}}}

trie << ""
p trie.match("")
pp Trie.hash


#------------------------------------------


# alternative with default :root element

require 'pp'

class Hash
   def Hash.new_nested_hash
     Hash.new{|h,k| h[k]=Hash.new(&h.default_proc) }
   end
end


class Hash

   # code basis taken from: "Find every path and it's value in a Hash" by Florian Aßmann,
   # http://snippets.dzone.com/posts/show/3565

   def each_trie_path
      raise ArgumentError unless block_given?
      self.class.each_trie_path(self) { |path, object| yield(path, object) }
   end

   protected
   def self.each_trie_path(object, path = [], &block)  

      if object.is_a?(Hash)
         object.each do |key, value|

            if key == true && value == {}
               if path == [:root]  # special case for empty string: [[:root], {true=>{}}]
                  yield(path, {true=>{}})
                  next
               end
               yield(path, object)
            end

            self.each_trie_path(value, [path , key].flatten, &block)
         end
      else 
         yield(path, object)
      end
   end 

end


class Trie

   @hash = Hash.new_nested_hash
   class << self; attr_accessor :hash; end    # Trie.hash

   def initialize
      Trie.hash[:root]
   end


   def add_int(int)   # for int >= 0
      ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  # integer array; ex: [4,6,2]
      ia.unshift(:root)
      str = ""
      ia.size.times { |x| str << "[ia.at(#{x})]" }
      str = "Trie.hash" << str << "[true]"
      eval(str)
   end

   def matchi(int)  # match integer
      ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  
      node = Trie.hash.fetch(:root,nil)
      ia.each do |digit|
         node = node.fetch(digit,nil)
         return false unless node
      end
      node.fetch(true,nil) ? true : false
   end

   def mfpi(int)   # match first part of integer
      ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }
      node = Trie.hash.fetch(:root,nil)
      ia.each do |digit|
         node = node.fetch(digit,nil)
         return false unless node
      end
      return true
   end


   def <<(word)
      ca = word.split(//u)    # UTF-8 aware character array
      ca.unshift(:root)
      #ca = [:root, *word.split(//u)]
      #ca = [:root].concat(word.split(//u))
      str = ""
      ca.size.times { |x| str << "[ca.at(#{x})]" }
      str = "Trie.hash" << str << "[true]"
      #p str     # example: "Trie.hash[ca.at(0)][ca.at(1)][ca.at(2)][ca.at(3)][true]"
      eval(str)
   end

   def match(word)
      ca = word.split(//u)    
      ca.unshift(:root)
      #ca = [:root, *word.split(//u)]
      #ca = [:root].concat(word.split(//u))
      str = ""
      ca.size.times { |x| str << ".fetch(ca.at(#{x}),nil)" }
      str = "Trie.hash" << str << ".fetch(true,nil)"
      #p str   # example: "Trie.hash.fetch(ca.at(0),nil).fetch(ca.at(1),nil).fetch(ca.at(2),nil).fetch(true,nil)"
      ret = eval(str) rescue nil

=begin
      # alternative
      ca.size.times { |x| str << ".fetch(ca.at(#{x}),{})" }
      str = "Trie.hash" << str << ".fetch(true,nil)"
      #p str   # example: "Trie.hash.fetch(ca.at(0),{}).fetch(ca.at(1),{}).fetch(ca.at(2),{}).fetch(true,nil)"
      ret = eval(str)
=end

      ret == {} ? true : false   # {} is the default value created by Trie.hash or Hash.new_nested_hash respectively
   end

   def match2(word)   # match entire word
      ca = word.split(//u)    
      node = Trie.hash.fetch(:root,nil)
      ca.each do |char|
         node = node.fetch(char,nil)
         return false unless node
      end
      node.fetch(true,nil) ? true : false
   end

   def mfpw(word)   # match first part of word
      ca = word.split(//u)    
      node = Trie.hash.fetch(:root,nil)
      ca.each do |char|
         node = node.fetch(char,nil)
         return false unless node
      end
      return true
   end

end


trie = Trie.new
trie << "word"
pp Trie.hash            
p trie.match("word")      
p trie.match("word2")   
trie << "word2"
p trie.match("word2")   
pp Trie.hash            # {:root=>{"w"=>{"o"=>{"r"=>{"d"=>{true=>{}, "2"=>{true=>{}}}}}}}}

trie << ""
p trie.match("")
pp Trie.hash

puts

p trie.match2("word")      
p trie.mfpw("wor")     # match first part of word

puts

trie.add_int(8512)
pp Trie.hash
p trie.matchi(8512)
p trie.matchi(85)
p trie.mfpi(85)     # match first part of integer
p trie.matchi(51)

puts

puts
pp Trie.hash
paths = []
Trie.hash.each_trie_path { |path, value| paths.push([ path ]) }
#Trie.hash.each_trie_path { |path, value| paths.push([ path, value ]) }
#pp paths
puts
paths.each { |x| p x }
puts

Matching quoted strings in Ruby

An exercise in string processing and regexp matching, inspired by Parsing Quoted Strings in Ruby and Stupid Ruby Quoting Tricks.

#!/usr/local/bin/ruby -w

# some input examples
str = 'foo "bar baz" qux'
str = 'foo "bar baz " "bar baz" " bar baz" "bar "klr mre" " " \' "abc" \' baz " qux'
str = '" \' \'    " \n "   " \' \' "" foo \'ttt sss\' "bar "qqq zzz" baz" "added term" qux  " \' \'    "  yyy xxx'
str = '"""frickin \'#{bar}\'"""'
str = '""    "frickin chicken "    #{bar}""""'
str = '"""frickin "#{bar}""""'
str = '"a "b c" "d "e" f g" """h""""'       # cf. http://snippets.dzone.com/posts/show/4852

# escaped quotes
str = '\"'
str = "\\\""
str = '\\\''
str = "\\'"

# special cases
str = '"G","H I"'
str = '"G","H I""G","H I"'

str = '"abc""def"'
str = '"""a""b"'
str = '"abc""def""abc""def""abc""def"'
str = '"a"\'\'"b"'

str = "\"abc'vv'tt\"'klt'"

str = "abc,def,\"efg,hij\",klm,nop,\"qrstuv\",wxyz"
str = "abc,def,\"efg,hij\",klm, 'nop, \"qrstuv\",wxyz,mmm '"
str = "abc,def,\"efg,hij\",klm, \"nop, 'qrstuv',wxyz,mmm \""


puts
puts "input string:  #{str}" 
puts "str.inspect :  #{str.inspect}" 
puts

num_of_chars1 = str.count('a-zA-Z_0-9', "^\000ds")

error_code = 0      # in case of a parsing error Shellwords will be used instead of regex1 & regex2
str2 = str.clone

# encode escaped quotes
str = str.gsub(/\\"|\\'/) { |m| m =~ /^\\"$/ ? "\000d\000" : "\000s\000" }

dq_count = str.count('"')
sq_count = str.count("'")

if dq_count % 2 != 0 && sq_count % 2 != 0
   raise ArgumentError, "\e[1modd number of single & double quotes\e[m in: #{str}\nsq_count: #{sq_count}\ndq_count: #{dq_count}\n"
elsif dq_count % 2 != 0
   raise ArgumentError, "\e[1modd number of double quotes\e[m in: #{str}\ndq_count: #{dq_count}\n"
elsif sq_count % 2 != 0
   raise ArgumentError, "\e[1modd number of single quotes\e[m in: #{str}\nsq_count: #{sq_count}\n"
end

# regex1 separates substrings that contain quotes from substrings that do not contain quotes
regex1 = %r{[^"']+|["'].*?["'](?!.*["'])}m  

# example
#"abc 'quote1' pjk 'quote2' xyz".scan(regex1) { |m| puts m } 


regex2 = %r{
# experimental: special cases
\s*["'][^"']+["'][[:punct:]]["'][^"']+["']|  # special case:  xxx "ab c","def g" yyy
\s*["'][^"']+["']{2,}[^"']+["']|             # special case:  xxx "abc""def" yyy
\s"[^"]+"|                                   # special case: xxx "abc 'def' ghi"
\s'[^']+'|                                   # special case: xxx 'abc "def" ghi'
\s*["']\S+["']|                              # special case: "abc'vv'tt"'klt'

\s'\s|                       # xxx ' yyy
\s"\s|                       # xxx " yyy
\s''\s|                      # xxx '' yyy
\s""\s|                      # xxx "" yyy
\s'\s+'\s|                   # xxx '   ' yyy
\s"\s+"\s|                   # xxx "   " yyy
\s"\s[^"]+\s"\s|             # xxx " abc " yyy
\s'\s[^']+\s'\s|             # xxx ' abc ' yyy
\s["']["']+(?=[^"'\s])|      # :qoblock:  xxx "'""'abc yyy
[^"'\s]["']["']+(?=\s)|      # :qcblock:  xxx abc"'""' yyy
\s""+|                       # :dqoblock:  xxx """abc yyy
\s''+|                       # :sqoblock:  xxx '''abc yyy
[^"]""+|                     # :dqcblock:  xxx abc"" yyy
[^']''+|                     # :sqcblock:  xxx abc'' yyy
\s["'](?=[^"'\s])|           # :dqo or :sqo:  xxx "abc yyy  or  xxx 'abc yyy
[^"'\s]["'](?=\s)|           # :dqc or :sqc:  xxx abc" yyy  or  xxx abc' yyy
[^"']+[^"'\s](?=\s)          # no quotes at all
}mx


=begin

There are different kinds of quotes matched by regex2 below. They include:

- :sqo (single quote open)
- :sqc (single quote close)
- :sqoblock (single quote open block)
- :sqcblock (single quote close block)

- :dqo (double quote open)
- :dqc (double quote close)
- :dqoblock (double quote open block)
- :dqcblock (double quote close block)

- :qoblock (quote open block)
- :qcblock (quote close block)

=end


ret = []

str.scan(regex1) do |s| 

   if s !~ /\A["']/

      #puts "s1: #{s}"
      #puts "s1.inspect: #{s.inspect}"

      s.split(/\s+/m).each { |t| ret << t unless t.empty? }

   else

      #puts "s2: #{s}"
      #puts "s2.inspect: #{s.inspect}"

      open_quotes = 0
      close_quotes = 0
      ar = []

      # add spaces to simplify regex2 matching
      s = "\x20" << s << "\x20"    
      s.gsub!(/\x20/, "\x20\x20")  


      s.scan(regex2) do |m|

         # get the index of the quote
         # + 1 for leading space or non-space
         # $` is the prematch string

         index = $`.length + 1 

         post_match = $'  

         #puts
         #puts "index: #{index}"
         #puts "m: #{m.inspect}"
         #puts "m.length: #{m.length}"
         #puts "open_quotes:  #{open_quotes}\nclose_quotes: #{close_quotes}"
         #puts "ret: #{ret.inspect}"
         #puts "ar: #{ar.inspect}"
         #puts


         if m =~ /\A\s''\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << ''
            next

         elsif m =~ /\A\s""\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << ""
            next

         # example: xxx "ab c","def g" yyy
         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["'][^"']+["'][[:punct:]]["'][^"']+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0           

            m = m.gsub(/\x20\x20/, "\x20")
            # cf. http://henrik.nyh.se/2008/03/flickr-style-tag-splitting-in-ruby
            m = m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? }
            #m = m.split(/"(.+?)"|'(.+?)'|\s+/).reject {|sm| sm.empty? }
            #m = m.split(/"(.+?)"|'(.+?)'|([[:punct:]])|\s+/).reject {|sm| sm.empty? }
            ret.concat(m)
            next

         # example: xxx "abc""def" yyy
         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["'][^"']+["']{2,}[^"']+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0           
            
            m = m.gsub(/\x20\x20/, "\x20")
            m = m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? }
            #m = m.split(/"(.+?)"|'(.+?)'|\s+/).reject {|sm| sm.empty? }
            ret.concat(m)
            next


         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s"[^"]+"\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
            ret.concat(m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? })
            next

         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s'[^']+'\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
            ret.concat(m.split(/'(.+?)'|\s+/).reject {|sm| sm.empty? })
            next

         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["']\S+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
            ret.concat(m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? })
            next


         elsif m =~ /\A\s"\s[^"]+\s"\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
            next

         elsif m =~ /\A\s'\s[^']+\s"\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
            next

         elsif m =~ /\A\s'\s+'\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
            next

         elsif m =~ /\A\s"\s+"\s\z/

           next unless open_quotes == 0 && close_quotes == 0
           ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
           next


         elsif m =~ /\A\s""+\z/

            l = m.strip.length
            ar << [:dqoblock, index, l]
            old_open_quotes = open_quotes
            open_quotes += l

            if close_quotes == 0 && old_open_quotes == 0 && open_quotes % 2 == 0 && post_match !~ /"/
               ret << m[2..-2] 
               open_quotes = 0
               ar.pop
               next
            end


         elsif m =~ /\A\s''+\z/

            l = m.strip.length
            ar << [:sqoblock, index, l]
            old_open_quotes = open_quotes
            open_quotes += l

            if close_quotes == 0 && old_open_quotes == 0 && open_quotes % 2 == 0 && post_match !~ /'/
               ret << m[2..-2] 
               open_quotes = 0
               ar.pop
               next
            end


         elsif m =~ /\A[^"]""+\z/

            l = m[1..-1].strip.length
            ar << [:dqcblock, index+l-1, l]      #  index+l-1 is the index of the last closing quote: ''"'[']
            old_close_quotes = close_quotes
            close_quotes += l

            if open_quotes == 0 && old_close_quotes == 0 && close_quotes % 2 == 0 && post_match !~ /"/
               ret << m[2..-2] 
               close_quotes = 0
               ar.pop
               next
            end

         elsif m =~ /\A[^']''+\z/

            l = m[1..-1].strip.length
            ar << [:sqcblock, index+l-1, l]
            old_close_quotes = close_quotes
            close_quotes += l

            if open_quotes == 0 && old_close_quotes == 0 && close_quotes % 2 == 0 && post_match !~ /'/
               ret << m[2..-2] 
               close_quotes = 0
               ar.pop
               next
            end


         elsif m =~ /\A\s'\z/

            ar << [:sqo, index, 1]
            open_quotes += 1

         elsif m =~ /\A\S'\z/

            ar << [:sqc, index, 1]
            close_quotes += 1

         elsif m =~ /\A\s"\z/

            ar << [:dqo, index, 1]
            open_quotes += 1

         elsif m =~ /\A\S"\z/

            ar << [:dqc, index,