Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

« Newer Snippets
Older Snippets »
Showing 1-4 of 4 total  RSS 

Basic Trie data structure in Ruby

   1  
   2  # Inspired by http://www.rubyquiz.com/quiz103.html
   3  # cf. also http://en.wikipedia.org/wiki/Trie
   4  
   5  #!/usr/local/bin/ruby -w
   6  
   7  require 'pp'
   8  
   9  class Hash
  10  
  11     def deep_merge!(second)
  12        # cf. http://snippets.dzone.com/posts/show/4146
  13        merger = proc { |key,v1,v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : v2 }
  14        self.merge!(second, &merger)
  15     end
  16  
  17     def nested_hash(array)
  18        node = self
  19        array.each do |i|
  20           node[i]=Hash.new if node[i].nil?
  21           node = node[i]
  22        end 
  23        self
  24     end
  25  
  26     def merge_nested_hash!(nested_hash)
  27        deep_merge!(nested_hash)
  28     end
  29  
  30     # code basis taken from: "Find every path and it's value in a Hash" by Florian Aßmann,
  31     # http://snippets.dzone.com/posts/show/3565
  32  
  33     def each_trie_path
  34        raise ArgumentError unless block_given?
  35        self.class.each_trie_path(self) { |path, object| yield(path, object) }
  36     end
  37  
  38     protected
  39     def self.each_trie_path(object, path = [], &block)  
  40  
  41        if object.is_a?(Hash)
  42           object.each do |key, value|
  43  
  44              if key == true && value == {}
  45                 if path == [:root]  # special case for empty string: [[:root], {true=>{}}]
  46                    yield(path, {true=>{}})
  47                    next
  48                 end
  49                 yield(path, object)
  50              end
  51  
  52              self.each_trie_path(value, [path , key].flatten, &block)
  53           end
  54        else 
  55           yield(path, object)
  56        end
  57     end 
  58  
  59  end
  60  
  61  
  62  class Trie
  63  
  64     @hash = Hash.new.merge_nested_hash!(Hash.new)
  65     #@hash = Hash.new.merge_nested_hash!({:root=>{}})
  66     #@hash = Hash.new.merge_nested_hash!(Hash.new.nested_hash([:root]))
  67     class << self; attr_accessor :hash; end    # Trie.hash
  68  
  69     def initialize
  70        Trie.hash.merge_nested_hash!({:root=>{}})
  71     end
  72  
  73  
  74     def add_int(int)   # for int >= 0
  75        ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  # integer array; ex: [4,6,2]
  76        ia.unshift(:root).push(true)
  77        Trie.hash.merge_nested_hash!(Hash.new.nested_hash(ia))
  78     end
  79  
  80     def matchi(int)  # match integer
  81        ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  
  82        node = Trie.hash.fetch(:root,nil)
  83        ia.each do |digit|
  84           node = node[digit]
  85           #node = node.fetch(digit,nil)
  86           return false unless node
  87        end
  88        node.fetch(true,nil) ? true : false
  89     end
  90  
  91     def mfpi(int)   # match first part of integer
  92        ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }
  93        node = Trie.hash.fetch(:root,nil)
  94        ia.each do |digit|
  95           node = node[digit]
  96           return false unless node
  97        end
  98        return true
  99     end
 100  
 101  
 102     def add_word(word)
 103        ca = word.split(//u)    # UTF-8 aware character array
 104        ca.unshift(:root).push(true)
 105        Trie.hash.merge_nested_hash!(Hash.new.nested_hash(ca))
 106     end
 107  
 108     def match(word)   # match entire word
 109        ca = word.split(//u)    
 110        node = Trie.hash.fetch(:root,nil)
 111        ca.each do |char|
 112           node = node[char]
 113           #node = node.fetch(char,nil)
 114           return false unless node
 115        end
 116        node.fetch(true,nil) ? true : false
 117     end
 118  
 119     def mfpw(word)   # match first part of word
 120        ca = word.split(//u)    
 121        node = Trie.hash.fetch(:root,nil)
 122        ca.each do |char|
 123           node = node[char]
 124           return false unless node
 125        end
 126        return true
 127     end
 128  
 129  end
 130  
 131  
 132  trie = Trie.new
 133  p Trie.hash
 134  
 135  trie.add_word("word")
 136  pp Trie.hash            
 137  p trie.match("word")      
 138  p trie.match("word2")   
 139  trie.add_word("word2")
 140  p trie.match("word2")   
 141  pp Trie.hash            # {:root=>{"w"=>{"o"=>{"r"=>{"d"=>{true=>{}, "2"=>{true=>{}}}}}}}}
 142  
 143  trie.add_word("")
 144  p trie.match("")
 145  pp Trie.hash
 146  
 147  puts
 148  
 149  p trie.match("word")      
 150  p trie.mfpw("wor")     # match first part of word
 151  
 152  puts
 153  
 154  trie.add_int(12345)
 155  p Trie.hash
 156  
 157  trie.add_int(12980345)
 158  pp Trie.hash
 159  
 160  trie.add_int(8512)
 161  pp Trie.hash
 162  p trie.matchi(8512)
 163  p trie.matchi(85)
 164  p trie.mfpi(85)     # match first part of integer
 165  p trie.matchi(51)
 166  
 167  puts
 168  
 169  puts
 170  pp Trie.hash
 171  paths = []
 172  Trie.hash.each_trie_path { |path, value| paths.push([ path ]) }
 173  #Trie.hash.each_trie_path { |path, value| paths.push([ path, value ]) }
 174  #pp paths
 175  puts
 176  paths.each { |x| p x }
 177  puts
 178  
 179  
 180  #------------------------------------------------------
 181  
 182  
 183  require 'pp'
 184  
 185  class Hash
 186     def Hash.new_nested_hash
 187       Hash.new{|h,k| h[k]=Hash.new(&h.default_proc) }
 188     end
 189  end
 190  
 191  
 192  class Trie
 193  
 194     @hash = Hash.new_nested_hash
 195     #@hash = Hash.new_nested_hash.update(true=>{})  # add empty string by default
 196     class << self; attr_accessor :hash; end    # Trie.hash
 197  
 198     def <<(word)
 199        ca = word.split(//u)    # UTF-8 aware character array
 200        wl = ca.size            # word length
 201        str = ""
 202        wl.times { |x| str << "[ca.at(#{x})]" }
 203        str = "Trie.hash" << str << "[true]"
 204        #p str     # example: "Trie.hash[ca.at(0)][ca.at(1)][ca.at(2)][ca.at(3)][true]"
 205        eval(str)
 206     end
 207  
 208  
 209     def match(word)
 210        ca = word.split(//u)    # UTF-8 aware character array
 211        wl = ca.size            # word length
 212        str = ""
 213  
 214        wl.times { |x| str << ".fetch(ca.at(#{x}),nil)" }
 215        str = "Trie.hash" << str << ".fetch(true,nil)"
 216        #p str   # example: "Trie.hash.fetch(ca.at(0),nil).fetch(ca.at(1),nil).fetch(ca.at(2),nil).fetch(true,nil)"
 217        ret = eval(str) rescue nil
 218  
 219  =begin
 220        # alternative
 221        wl.times { |x| str << ".fetch(ca.at(#{x}),{})" }
 222        str = "Trie.hash" << str << ".fetch(true,nil)"
 223        #p str   # example: "Trie.hash.fetch(ca.at(0),{}).fetch(ca.at(1),{}).fetch(ca.at(2),{}).fetch(true,nil)"
 224        ret = eval(str)
 225  =end
 226  
 227        ret == {} ? true : false   # {} is the default value created by Trie.hash or Hash.new_nested_hash respectively
 228     end
 229  
 230  end
 231  
 232  
 233  trie = Trie.new
 234  trie << "word"
 235  pp Trie.hash            
 236  p trie.match("word")      
 237  p trie.match("word2")   
 238  trie << "word2"
 239  p trie.match("word2")   
 240  pp Trie.hash            # {"w"=>{"o"=>{"r"=>{"d"=>{true=>{}, "2"=>{true=>{}}}}}}}
 241  
 242  trie << ""
 243  p trie.match("")
 244  pp Trie.hash
 245  
 246  
 247  #------------------------------------------
 248  
 249  
 250  # alternative with default :root element
 251  
 252  require 'pp'
 253  
 254  class Hash
 255     def Hash.new_nested_hash
 256       Hash.new{|h,k| h[k]=Hash.new(&h.default_proc) }
 257     end
 258  end
 259  
 260  
 261  class Hash
 262  
 263     # code basis taken from: "Find every path and it's value in a Hash" by Florian Aßmann,
 264     # http://snippets.dzone.com/posts/show/3565
 265  
 266     def each_trie_path
 267        raise ArgumentError unless block_given?
 268        self.class.each_trie_path(self) { |path, object| yield(path, object) }
 269     end
 270  
 271     protected
 272     def self.each_trie_path(object, path = [], &block)  
 273  
 274        if object.is_a?(Hash)
 275           object.each do |key, value|
 276  
 277              if key == true && value == {}
 278                 if path == [:root]  # special case for empty string: [[:root], {true=>{}}]
 279                    yield(path, {true=>{}})
 280                    next
 281                 end
 282                 yield(path, object)
 283              end
 284  
 285              self.each_trie_path(value, [path , key].flatten, &block)
 286           end
 287        else 
 288           yield(path, object)
 289        end
 290     end 
 291  
 292  end
 293  
 294  
 295  class Trie
 296  
 297     @hash = Hash.new_nested_hash
 298     class << self; attr_accessor :hash; end    # Trie.hash
 299  
 300     def initialize
 301        Trie.hash[:root]
 302     end
 303  
 304  
 305     def add_int(int)   # for int >= 0
 306        ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  # integer array; ex: [4,6,2]
 307        ia.unshift(:root)
 308        str = ""
 309        ia.size.times { |x| str << "[ia.at(#{x})]" }
 310        str = "Trie.hash" << str << "[true]"
 311        eval(str)
 312     end
 313  
 314     def matchi(int)  # match integer
 315        ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }  
 316        node = Trie.hash.fetch(:root,nil)
 317        ia.each do |digit|
 318           node = node.fetch(digit,nil)
 319           return false unless node
 320        end
 321        node.fetch(true,nil) ? true : false
 322     end
 323  
 324     def mfpi(int)   # match first part of integer
 325        ia = int.to_s.scan(/[[:digit:]]/).map { |i| i.to_i }
 326        node = Trie.hash.fetch(:root,nil)
 327        ia.each do |digit|
 328           node = node.fetch(digit,nil)
 329           return false unless node
 330        end
 331        return true
 332     end
 333  
 334  
 335     def <<(word)
 336        ca = word.split(//u)    # UTF-8 aware character array
 337        ca.unshift(:root)
 338        #ca = [:root, *word.split(//u)]
 339        #ca = [:root].concat(word.split(//u))
 340        str = ""
 341        ca.size.times { |x| str << "[ca.at(#{x})]" }
 342        str = "Trie.hash" << str << "[true]"
 343        #p str     # example: "Trie.hash[ca.at(0)][ca.at(1)][ca.at(2)][ca.at(3)][true]"
 344        eval(str)
 345     end
 346  
 347     def match(word)
 348        ca = word.split(//u)    
 349        ca.unshift(:root)
 350        #ca = [:root, *word.split(//u)]
 351        #ca = [:root].concat(word.split(//u))
 352        str = ""
 353        ca.size.times { |x| str << ".fetch(ca.at(#{x}),nil)" }
 354        str = "Trie.hash" << str << ".fetch(true,nil)"
 355        #p str   # example: "Trie.hash.fetch(ca.at(0),nil).fetch(ca.at(1),nil).fetch(ca.at(2),nil).fetch(true,nil)"
 356        ret = eval(str) rescue nil
 357  
 358  =begin
 359        # alternative
 360        ca.size.times { |x| str << ".fetch(ca.at(#{x}),{})" }
 361        str = "Trie.hash" << str << ".fetch(true,nil)"
 362        #p str   # example: "Trie.hash.fetch(ca.at(0),{}).fetch(ca.at(1),{}).fetch(ca.at(2),{}).fetch(true,nil)"
 363        ret = eval(str)
 364  =end
 365  
 366        ret == {} ? true : false   # {} is the default value created by Trie.hash or Hash.new_nested_hash respectively
 367     end
 368  
 369     def match2(word)   # match entire word
 370        ca = word.split(//u)    
 371        node = Trie.hash.fetch(:root,nil)
 372        ca.each do |char|
 373           node = node.fetch(char,nil)
 374           return false unless node
 375        end
 376        node.fetch(true,nil) ? true : false
 377     end
 378  
 379     def mfpw(word)   # match first part of word
 380        ca = word.split(//u)    
 381        node = Trie.hash.fetch(:root,nil)
 382        ca.each do |char|
 383           node = node.fetch(char,nil)
 384           return false unless node
 385        end
 386        return true
 387     end
 388  
 389  end
 390  
 391  
 392  trie = Trie.new
 393  trie << "word"
 394  pp Trie.hash            
 395  p trie.match("word")      
 396  p trie.match("word2")   
 397  trie << "word2"
 398  p trie.match("word2")   
 399  pp Trie.hash            # {:root=>{"w"=>{"o"=>{"r"=>{"d"=>{true=>{}, "2"=>{true=>{}}}}}}}}
 400  
 401  trie << ""
 402  p trie.match("")
 403  pp Trie.hash
 404  
 405  puts
 406  
 407  p trie.match2("word")      
 408  p trie.mfpw("wor")     # match first part of word
 409  
 410  puts
 411  
 412  trie.add_int(8512)
 413  pp Trie.hash
 414  p trie.matchi(8512)
 415  p trie.matchi(85)
 416  p trie.mfpi(85)     # match first part of integer
 417  p trie.matchi(51)
 418  
 419  puts
 420  
 421  puts
 422  pp Trie.hash
 423  paths = []
 424  Trie.hash.each_trie_path { |path, value| paths.push([ path ]) }
 425  #Trie.hash.each_trie_path { |path, value| paths.push([ path, value ]) }
 426  #pp paths
 427  puts
 428  paths.each { |x| p x }
 429  puts

Matching quoted strings in Ruby

An exercise in string processing and regexp matching, inspired by Parsing Quoted Strings in Ruby and Stupid Ruby Quoting Tricks.

   1  
   2  #!/usr/local/bin/ruby -w
   3  
   4  # some input examples
   5  str = 'foo "bar baz" qux'
   6  str = 'foo "bar baz " "bar baz" " bar baz" "bar "klr mre" " " \' "abc" \' baz " qux'
   7  str = '" \' \'    " \n "   " \' \' "" foo \'ttt sss\' "bar "qqq zzz" baz" "added term" qux  " \' \'    "  yyy xxx'
   8  str = '"""frickin \'#{bar}\'"""'
   9  str = '""    "frickin chicken "    #{bar}""""'
  10  str = '"""frickin "#{bar}""""'
  11  str = '"a "b c" "d "e" f g" """h""""'       # cf. http://snippets.dzone.com/posts/show/4852
  12  
  13  # escaped quotes
  14  str = '\"'
  15  str = "\\\""
  16  str = '\\\''
  17  str = "\\'"
  18  
  19  # special cases
  20  str = '"G","H I"'
  21  str = '"G","H I""G","H I"'
  22  
  23  str = '"abc""def"'
  24  str = '"""a""b"'
  25  str = '"abc""def""abc""def""abc""def"'
  26  str = '"a"\'\'"b"'
  27  
  28  str = "\"abc'vv'tt\"'klt'"
  29  
  30  str = "abc,def,\"efg,hij\",klm,nop,\"qrstuv\",wxyz"
  31  str = "abc,def,\"efg,hij\",klm, 'nop, \"qrstuv\",wxyz,mmm '"
  32  str = "abc,def,\"efg,hij\",klm, \"nop, 'qrstuv',wxyz,mmm \""
  33  
  34  
  35  puts
  36  puts "input string:  #{str}" 
  37  puts "str.inspect :  #{str.inspect}" 
  38  puts
  39  
  40  num_of_chars1 = str.count('a-zA-Z_0-9', "^\000ds")
  41  
  42  error_code = 0      # in case of a parsing error Shellwords will be used instead of regex1 & regex2
  43  str2 = str.clone
  44  
  45  # encode escaped quotes
  46  str = str.gsub(/\\"|\\'/) { |m| m =~ /^\\"$/ ? "\000d\000" : "\000s\000" }
  47  
  48  dq_count = str.count('"')
  49  sq_count = str.count("'")
  50  
  51  if dq_count % 2 != 0 && sq_count % 2 != 0
  52     raise ArgumentError, "\e[1modd number of single & double quotes\e[m in: #{str}\nsq_count: #{sq_count}\ndq_count: #{dq_count}\n"
  53  elsif dq_count % 2 != 0
  54     raise ArgumentError, "\e[1modd number of double quotes\e[m in: #{str}\ndq_count: #{dq_count}\n"
  55  elsif sq_count % 2 != 0
  56     raise ArgumentError, "\e[1modd number of single quotes\e[m in: #{str}\nsq_count: #{sq_count}\n"
  57  end
  58  
  59  # regex1 separates substrings that contain quotes from substrings that do not contain quotes
  60  regex1 = %r{[^"']+|["'].*?["'](?!.*["'])}m  
  61  
  62  # example
  63  #"abc 'quote1' pjk 'quote2' xyz".scan(regex1) { |m| puts m } 
  64  
  65  
  66  regex2 = %r{
  67  # experimental: special cases
  68  \s*["'][^"']+["'][[:punct:]]["'][^"']+["']|  # special case:  xxx "ab c","def g" yyy
  69  \s*["'][^"']+["']{2,}[^"']+["']|             # special case:  xxx "abc""def" yyy
  70  \s"[^"]+"|                                   # special case: xxx "abc 'def' ghi"
  71  \s'[^']+'|                                   # special case: xxx 'abc "def" ghi'
  72  \s*["']\S+["']|                              # special case: "abc'vv'tt"'klt'
  73  
  74  \s'\s|                       # xxx ' yyy
  75  \s"\s|                       # xxx " yyy
  76  \s''\s|                      # xxx '' yyy
  77  \s""\s|                      # xxx "" yyy
  78  \s'\s+'\s|                   # xxx '   ' yyy
  79  \s"\s+"\s|                   # xxx "   " yyy
  80  \s"\s[^"]+\s"\s|             # xxx " abc " yyy
  81  \s'\s[^']+\s'\s|             # xxx ' abc ' yyy
  82  \s["']["']+(?=[^"'\s])|      # :qoblock:  xxx "'""'abc yyy
  83  [^"'\s]["']["']+(?=\s)|      # :qcblock:  xxx abc"'""' yyy
  84  \s""+|                       # :dqoblock:  xxx """abc yyy
  85  \s''+|                       # :sqoblock:  xxx '''abc yyy
  86  [^"]""+|                     # :dqcblock:  xxx abc"" yyy
  87  [^']''+|                     # :sqcblock:  xxx abc'' yyy
  88  \s["'](?=[^"'\s])|           # :dqo or :sqo:  xxx "abc yyy  or  xxx 'abc yyy
  89  [^"'\s]["'](?=\s)|           # :dqc or :sqc:  xxx abc" yyy  or  xxx abc' yyy
  90  [^"']+[^"'\s](?=\s)          # no quotes at all
  91  }mx
  92  
  93  
  94  =begin
  95  
  96  There are different kinds of quotes matched by regex2 below. They include:
  97  
  98  - :sqo (single quote open)
  99  - :sqc (single quote close)
 100  - :sqoblock (single quote open block)
 101  - :sqcblock (single quote close block)
 102  
 103  - :dqo (double quote open)
 104  - :dqc (double quote close)
 105  - :dqoblock (double quote open block)
 106  - :dqcblock (double quote close block)
 107  
 108  - :qoblock (quote open block)
 109  - :qcblock (quote close block)
 110  
 111  =end
 112  
 113  
 114  ret = []
 115  
 116  str.scan(regex1) do |s| 
 117  
 118     if s !~ /\A["']/
 119  
 120        #puts "s1: #{s}"
 121        #puts "s1.inspect: #{s.inspect}"
 122  
 123        s.split(/\s+/m).each { |t| ret << t unless t.empty? }
 124  
 125     else
 126  
 127        #puts "s2: #{s}"
 128        #puts "s2.inspect: #{s.inspect}"
 129  
 130        open_quotes = 0
 131        close_quotes = 0
 132        ar = []
 133  
 134        # add spaces to simplify regex2 matching
 135        s = "\x20" << s << "\x20"    
 136        s.gsub!(/\x20/, "\x20\x20")  
 137  
 138  
 139        s.scan(regex2) do |m|
 140  
 141           # get the index of the quote
 142           # + 1 for leading space or non-space
 143           # $` is the prematch string
 144  
 145           index = $`.length + 1 
 146  
 147           post_match = $'  
 148  
 149           #puts
 150           #puts "index: #{index}"
 151           #puts "m: #{m.inspect}"
 152           #puts "m.length: #{m.length}"
 153           #puts "open_quotes:  #{open_quotes}\nclose_quotes: #{close_quotes}"
 154           #puts "ret: #{ret.inspect}"
 155           #puts "ar: #{ar.inspect}"
 156           #puts
 157  
 158  
 159           if m =~ /\A\s''\s\z/
 160  
 161              next unless open_quotes == 0 && close_quotes == 0
 162              ret << ''
 163              next
 164  
 165           elsif m =~ /\A\s""\s\z/
 166  
 167              next unless open_quotes == 0 && close_quotes == 0
 168              ret << ""
 169              next
 170  
 171           # example: xxx "ab c","def g" yyy
 172           elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["'][^"']+["'][[:punct:]]["'][^"']+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0           
 173  
 174              m = m.gsub(/\x20\x20/, "\x20")
 175              # cf. http://henrik.nyh.se/2008/03/flickr-style-tag-splitting-in-ruby
 176              m = m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? }
 177              #m = m.split(/"(.+?)"|'(.+?)'|\s+/).reject {|sm| sm.empty? }
 178              #m = m.split(/"(.+?)"|'(.+?)'|([[:punct:]])|\s+/).reject {|sm| sm.empty? }
 179              ret.concat(m)
 180              next
 181  
 182           # example: xxx "abc""def" yyy
 183           elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["'][^"']+["']{2,}[^"']+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0           
 184              
 185              m = m.gsub(/\x20\x20/, "\x20")
 186              m = m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? }
 187              #m = m.split(/"(.+?)"|'(.+?)'|\s+/).reject {|sm| sm.empty? }
 188              ret.concat(m)
 189              next
 190  
 191  
 192           elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s"[^"]+"\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
 193              ret.concat(m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? })
 194              next
 195  
 196           elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s'[^']+'\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
 197              ret.concat(m.split(/'(.+?)'|\s+/).reject {|sm| sm.empty? })
 198              next
 199  
 200           elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["']\S+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
 201              ret.concat(m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? })
 202              next
 203  
 204  
 205           elsif m =~ /\A\s"\s[^"]+\s"\s\z/
 206  
 207              next unless open_quotes == 0 && close_quotes == 0
 208              ret << m.gsub(