Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

About this user

« Newer Snippets
Older Snippets »
Showing 1-8 of 8 total  RSS 

Matching quoted strings in Ruby

An exercise in string processing and regexp matching, inspired by Parsing Quoted Strings in Ruby and Stupid Ruby Quoting Tricks.

#!/usr/local/bin/ruby -w

# some input examples
str = 'foo "bar baz" qux'
str = 'foo "bar baz " "bar baz" " bar baz" "bar "klr mre" " " \' "abc" \' baz " qux'
str = '" \' \'    " \n "   " \' \' "" foo \'ttt sss\' "bar "qqq zzz" baz" "added term" qux  " \' \'    "  yyy xxx'
str = '"""frickin \'#{bar}\'"""'
str = '""    "frickin chicken "    #{bar}""""'
str = '"""frickin "#{bar}""""'
str = '"a "b c" "d "e" f g" """h""""'       # cf. http://snippets.dzone.com/posts/show/4852

# escaped quotes
str = '\"'
str = "\\\""
str = '\\\''
str = "\\'"

# special cases
str = '"G","H I"'
str = '"G","H I""G","H I"'

str = '"abc""def"'
str = '"""a""b"'
str = '"abc""def""abc""def""abc""def"'
str = '"a"\'\'"b"'

str = "\"abc'vv'tt\"'klt'"

str = "abc,def,\"efg,hij\",klm,nop,\"qrstuv\",wxyz"
str = "abc,def,\"efg,hij\",klm, 'nop, \"qrstuv\",wxyz,mmm '"
str = "abc,def,\"efg,hij\",klm, \"nop, 'qrstuv',wxyz,mmm \""


puts
puts "input string:  #{str}" 
puts "str.inspect :  #{str.inspect}" 
puts

num_of_chars1 = str.count('a-zA-Z_0-9', "^\000ds")

error_code = 0      # in case of a parsing error Shellwords will be used instead of regex1 & regex2
str2 = str.clone

# encode escaped quotes
str = str.gsub(/\\"|\\'/) { |m| m =~ /^\\"$/ ? "\000d\000" : "\000s\000" }

dq_count = str.count('"')
sq_count = str.count("'")

if dq_count % 2 != 0 && sq_count % 2 != 0
   raise ArgumentError, "\e[1modd number of single & double quotes\e[m in: #{str}\nsq_count: #{sq_count}\ndq_count: #{dq_count}\n"
elsif dq_count % 2 != 0
   raise ArgumentError, "\e[1modd number of double quotes\e[m in: #{str}\ndq_count: #{dq_count}\n"
elsif sq_count % 2 != 0
   raise ArgumentError, "\e[1modd number of single quotes\e[m in: #{str}\nsq_count: #{sq_count}\n"
end

# regex1 separates substrings that contain quotes from substrings that do not contain quotes
regex1 = %r{[^"']+|["'].*?["'](?!.*["'])}m  

# example
#"abc 'quote1' pjk 'quote2' xyz".scan(regex1) { |m| puts m } 


regex2 = %r{
# experimental: special cases
\s*["'][^"']+["'][[:punct:]]["'][^"']+["']|  # special case:  xxx "ab c","def g" yyy
\s*["'][^"']+["']{2,}[^"']+["']|             # special case:  xxx "abc""def" yyy
\s"[^"]+"|                                   # special case: xxx "abc 'def' ghi"
\s'[^']+'|                                   # special case: xxx 'abc "def" ghi'
\s*["']\S+["']|                              # special case: "abc'vv'tt"'klt'

\s'\s|                       # xxx ' yyy
\s"\s|                       # xxx " yyy
\s''\s|                      # xxx '' yyy
\s""\s|                      # xxx "" yyy
\s'\s+'\s|                   # xxx '   ' yyy
\s"\s+"\s|                   # xxx "   " yyy
\s"\s[^"]+\s"\s|             # xxx " abc " yyy
\s'\s[^']+\s'\s|             # xxx ' abc ' yyy
\s["']["']+(?=[^"'\s])|      # :qoblock:  xxx "'""'abc yyy
[^"'\s]["']["']+(?=\s)|      # :qcblock:  xxx abc"'""' yyy
\s""+|                       # :dqoblock:  xxx """abc yyy
\s''+|                       # :sqoblock:  xxx '''abc yyy
[^"]""+|                     # :dqcblock:  xxx abc"" yyy
[^']''+|                     # :sqcblock:  xxx abc'' yyy
\s["'](?=[^"'\s])|           # :dqo or :sqo:  xxx "abc yyy  or  xxx 'abc yyy
[^"'\s]["'](?=\s)|           # :dqc or :sqc:  xxx abc" yyy  or  xxx abc' yyy
[^"']+[^"'\s](?=\s)          # no quotes at all
}mx


=begin

There are different kinds of quotes matched by regex2 below. They include:

- :sqo (single quote open)
- :sqc (single quote close)
- :sqoblock (single quote open block)
- :sqcblock (single quote close block)

- :dqo (double quote open)
- :dqc (double quote close)
- :dqoblock (double quote open block)
- :dqcblock (double quote close block)

- :qoblock (quote open block)
- :qcblock (quote close block)

=end


ret = []

str.scan(regex1) do |s| 

   if s !~ /\A["']/

      #puts "s1: #{s}"
      #puts "s1.inspect: #{s.inspect}"

      s.split(/\s+/m).each { |t| ret << t unless t.empty? }

   else

      #puts "s2: #{s}"
      #puts "s2.inspect: #{s.inspect}"

      open_quotes = 0
      close_quotes = 0
      ar = []

      # add spaces to simplify regex2 matching
      s = "\x20" << s << "\x20"    
      s.gsub!(/\x20/, "\x20\x20")  


      s.scan(regex2) do |m|

         # get the index of the quote
         # + 1 for leading space or non-space
         # $` is the prematch string

         index = $`.length + 1 

         post_match = $'  

         #puts
         #puts "index: #{index}"
         #puts "m: #{m.inspect}"
         #puts "m.length: #{m.length}"
         #puts "open_quotes:  #{open_quotes}\nclose_quotes: #{close_quotes}"
         #puts "ret: #{ret.inspect}"
         #puts "ar: #{ar.inspect}"
         #puts


         if m =~ /\A\s''\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << ''
            next

         elsif m =~ /\A\s""\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << ""
            next

         # example: xxx "ab c","def g" yyy
         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["'][^"']+["'][[:punct:]]["'][^"']+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0           

            m = m.gsub(/\x20\x20/, "\x20")
            # cf. http://henrik.nyh.se/2008/03/flickr-style-tag-splitting-in-ruby
            m = m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? }
            #m = m.split(/"(.+?)"|'(.+?)'|\s+/).reject {|sm| sm.empty? }
            #m = m.split(/"(.+?)"|'(.+?)'|([[:punct:]])|\s+/).reject {|sm| sm.empty? }
            ret.concat(m)
            next

         # example: xxx "abc""def" yyy
         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["'][^"']+["']{2,}[^"']+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0           
            
            m = m.gsub(/\x20\x20/, "\x20")
            m = m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? }
            #m = m.split(/"(.+?)"|'(.+?)'|\s+/).reject {|sm| sm.empty? }
            ret.concat(m)
            next


         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s"[^"]+"\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
            ret.concat(m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? })
            next

         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s'[^']+'\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
            ret.concat(m.split(/'(.+?)'|\s+/).reject {|sm| sm.empty? })
            next

         elsif open_quotes.zero? && close_quotes.zero? && m =~ /\A\s*["']\S+["']\z/ && m.count('"') % 2 == 0 && m.count("'") % 2 == 0
            ret.concat(m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? })
            next


         elsif m =~ /\A\s"\s[^"]+\s"\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
            next

         elsif m =~ /\A\s'\s[^']+\s"\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
            next

         elsif m =~ /\A\s'\s+'\s\z/

            next unless open_quotes == 0 && close_quotes == 0
            ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
            next

         elsif m =~ /\A\s"\s+"\s\z/

           next unless open_quotes == 0 && close_quotes == 0
           ret << m.gsub(/\x20\x20/, "\x20").strip[1..-2]
           next


         elsif m =~ /\A\s""+\z/

            l = m.strip.length
            ar << [:dqoblock, index, l]
            old_open_quotes = open_quotes
            open_quotes += l

            if close_quotes == 0 && old_open_quotes == 0 && open_quotes % 2 == 0 && post_match !~ /"/
               ret << m[2..-2] 
               open_quotes = 0
               ar.pop
               next
            end


         elsif m =~ /\A\s''+\z/

            l = m.strip.length
            ar << [:sqoblock, index, l]
            old_open_quotes = open_quotes
            open_quotes += l

            if close_quotes == 0 && old_open_quotes == 0 && open_quotes % 2 == 0 && post_match !~ /'/
               ret << m[2..-2] 
               open_quotes = 0
               ar.pop
               next
            end


         elsif m =~ /\A[^"]""+\z/

            l = m[1..-1].strip.length
            ar << [:dqcblock, index+l-1, l]      #  index+l-1 is the index of the last closing quote: ''"'[']
            old_close_quotes = close_quotes
            close_quotes += l

            if open_quotes == 0 && old_close_quotes == 0 && close_quotes % 2 == 0 && post_match !~ /"/
               ret << m[2..-2] 
               close_quotes = 0
               ar.pop
               next
            end

         elsif m =~ /\A[^']''+\z/

            l = m[1..-1].strip.length
            ar << [:sqcblock, index+l-1, l]
            old_close_quotes = close_quotes
            close_quotes += l

            if open_quotes == 0 && old_close_quotes == 0 && close_quotes % 2 == 0 && post_match !~ /'/
               ret << m[2..-2] 
               close_quotes = 0
               ar.pop
               next
            end


         elsif m =~ /\A\s'\z/

            ar << [:sqo, index, 1]
            open_quotes += 1

         elsif m =~ /\A\S'\z/

            ar << [:sqc, index, 1]
            close_quotes += 1

         elsif m =~ /\A\s"\z/

            ar << [:dqo, index, 1]
            open_quotes += 1

         elsif m =~ /\A\S"\z/

            ar << [:dqc, index, 1]
            close_quotes += 1


         else


            if m =~ /\A\s"\s\z/              # " surrounded by whitespace

               if open_quotes > close_quotes

                  ar << [:dqc, index, 1]
                  close_quotes += 1

                  # avoid :sqo followed by :dqc or :sqc followed by :dqc
                  if post_match =~ /"/ && open_quotes == close_quotes && (ar.at(-2).first == :sqo || ar.at(-2).first == :sqc)
                     ar.pop
                     ar << [:dqo, index, 1]
                     close_quotes -= 1
                     open_quotes += 1
                  end

               else 

                  ar << [:dqo, index, 1]
                  open_quotes += 1

               end


            elsif m =~ /\A\s'\s\z/          # ' surrounded by whitespace

               if open_quotes > close_quotes

                  ar << [:sqc, index, 1]
                  close_quotes += 1

                  # avoid :dqo followed by :sqc or :dqc followed by :sqc
                  if post_match =~ /'/ && open_quotes == close_quotes && (ar.at(-2).first == :dqo || ar.at(-2).first == :dqc)
                     ar.pop
                     ar << [:sqo, index, 1]
                     close_quotes -= 1
                     open_quotes += 1
                  end

               else 

                  ar << [:sqo, index, 1]
                  open_quotes += 1

               end


            elsif m =~ /\A\s["']["']+\z/              # :qoblock: xxx "'""'abc yyy

               l = m[1..-1].strip.length
               ar << [:qoblock, index, l]
               old_open_quotes = open_quotes
               open_quotes += l

               if close_quotes == 0 && old_open_quotes == 0 && open_quotes % 2 == 0 && post_match !~ /["']/
                  ret << m[2..-2] 
                  open_quotes = 0
                  ar.pop
                  next
               end


            elsif m =~ /\A[^"'\s]["']["']+\z/          # :qcblock: xxx abc"'""' yyy

               l = m[1..-1].strip.length
               ar << [:qcblock, index+l-1, l]
               old_close_quotes = close_quotes
               close_quotes += l

               if open_quotes == 0 && old_close_quotes == 0 && close_quotes % 2 == 0 && post_match !~ /["']/
                  ret << m[2..-2] 
                  close_quotes = 0
                  ar.pop
                  next
               end


            elsif m =~ /\A\s*["'].*?["']\s*\z/       # last try  (experimental)

               ret.concat(m.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? })
               next


            elsif m =~ /\A[^"']+[^"'\s]\z/          # part of quoted substring contains neither " nor '
               next unless open_quotes == 0 && close_quotes == 0
               next if m.strip.empty?
               ret << m.gsub(/\x20\x20/, "\x20").strip; next
            end

         end

         puts
         puts "open_quotes:  #{open_quotes}\nclose_quotes: #{close_quotes}\n"
         #puts "ar: #{ar.inspect}"

         if open_quotes == close_quotes

            #puts "open_quotes & close_quotes: #{close_quotes}"
            puts "ar: #{ar.inspect}"

            ret << s[ar.first[1]..ar.last[1]].gsub(/\x20\x20/, "\x20")[1..-2] unless ar.empty?

            ar.clear
            open_quotes = 0
            close_quotes = 0

         end

      end   # scan 2

      unless open_quotes.zero? && close_quotes.zero?
        error_code = 1
        puts "\e[1mparsing error\e[m for the quoted string: #{str.strip.squeeze[0..20]}"
        #raise "\e[1mparsing error\e[m for the quoted string: #{str.strip.squeeze[0..20]}"
      end

   end   # if

end   # scan 1



num_of_chars2 = ret.join.count('a-zA-Z_0-9', "^\000ds")

unless num_of_chars1 == num_of_chars2
   error_code = 1
   puts "\n\e[1mparsing error due to wrong number of characters a-zA-Z_0-9\e[m: \n#{num_of_chars2} instead of #{num_of_chars1}\n"
   #raise "\e[1mparsing error due to wrong number of characters a-zA-Z_0-9\e[m: \n#{num_of_chars2} instead of #{num_of_chars1}\n in #{str.strip.squeeze[0..20]}"
end


# use Shellwords in case the quote matching above failed
if error_code == 1       
#if error_code == 1 || ret.join =~ /\A["']+\z/        
   require 'shellwords'
   ret.clear
   ret.concat(Shellwords::shellwords(str))
   #str =~ /\A\S+\z/ ? ret.concat(str.split(/"(.+?)"|\s+/).reject {|sm| sm.empty? }) : ret.concat(Shellwords::shellwords(str))
end 



puts "\n\e[1mResult\e[m:\n\n"
ret.each_with_index do |t,i| 
   # decode encoded escaped quotes 
   t = t.gsub(/\000d\000|\000s\000/) { |m| m =~ /^\000d\000$/ ? '\"' : "\\'" }
   puts "#{i+1}:  #{t.inspect}" 
end

puts "\n\e[1mShellwords\e[m:\n\n"
require 'shellwords'
Shellwords::shellwords(str2).each_with_index { |t,i| puts "#{i+1}:  #{t.inspect}" }


#----------------------


# matching quoted strings using backreferences
# See: Regexes in Depth: Advanced Quoted String Matching,
# http://blog.stevenlevithan.com/archives/match-quoted-string

str = '"abc"'

regex = %r{(["'])([^"']*)(\1)}
regex = %r{(["'])([^\1]*)(\1)}
p regex

str.scan(regex) { |m| p m; p $1 << $2 << $3 }

String#stripped!


#!/usr/local/bin/ruby -w

class String

   def stripped!                                 
      gsub!(/^[[:space:]]*|[[:space:]]*$/, '')   # whitespace characters: [ \t\r\n\v\f]
   end                                           # cf. http://en.wikipedia.org/wiki/Regular_expression

   def lstripped!
      sub!(/^[[:space:]]*/, '')
   end

   def rstripped!
      sub!(/[[:space:]]*$/, '')
   end

   def stripped_all!
      gsub!(/^[[:cntrl:]\x20]*|[[:cntrl:]\x20]*$/, '')   # control characters: [\x00-\x1F\x7F] and space character: \x20
   end                                                   # cf. http://en.wikipedia.org/wiki/ASCII#ASCII_control_characters

   def lstripped_all!
      sub!(/^[[:cntrl:]\x20]*/, '')
   end

   def rstripped_all!
      sub!(/[[:cntrl:]\x20]*$/, '')
   end

   def delete_cntrl!
      return self unless self =~ /[[:cntrl:]]/ 
      gsub!(/[[:cntrl:]]/, '')
      #str = gsub!(/[[:cntrl:]]/, '')   # alternative
      #str.nil? ? self : str
   end

end


p "".strip!       #=> nil
p "".stripped!    #=> ""

p "abc".strip!       #=> nil
p "abc".stripped!    #=> "abc"

p "abc\000".strip!           #=> "abc" (!)
p "abc\000".stripped_all!    #=> "abc"

p "abc\000\001".strip!           #=> nil
p "abc\000\001".stripped_all!    #=> "abc"

puts

p "".gsub!(/[[:cntrl:]]/, '')   #=> nil
p "a".gsub!(/[[:cntrl:]]/, '')  #=> nil

p "".delete_cntrl!    #=> ""
p "a".delete_cntrl!   #=> "a"



text = <<-EOS
 \r \x00 this is an example \t\x11 text  caf\303\251 \x20\x20\x20\x20 \r \f

  \011  \x10 \x07  \t\r\v\f abc \v\000 def \000 \x20\x20 \r  \v \r
EOS


puts "\n\n\e[1mOriginal text:\e[m\n"
text.each_line { |l| p l }

puts

puts "\n\e[1mString#stripped!\e[m\n"
text.each_line do |l| 
   l.stripped!
   p l
end

puts "\n\e[1mString#lstripped!\e[m\n"
text.each_line do |l| 
   l.lstripped!
   p l
end

puts "\n\e[1mString#rstripped!\e[m\n"
text.each_line do |l| 
   l.rstripped!
   p l
end

puts "\n\e[1mString#stripped_all!\e[m\n"
text.each_line do |l| 
   l.stripped_all!
   p l
end

puts "\n\e[1mString#lstripped_all!\e[m\n"
text.each_line do |l| 
   l.lstripped_all!
   p l
end

puts "\n\e[1mString#rstripped_all!\e[m\n"
text.each_line do |l| 
   l.rstripped_all!
   p l
end

puts "\n\e[1mString#delete_cntrl!\e[m\n"
text.each_line do |l| 
   l.delete_cntrl!
   #l.delete_cntrl!.stripped_all!
   p l
end

Regex-based parsing technique


#!/usr/local/bin/ruby -w

# Regular expressions and strings with embedded objects
# From: http://t-a-w.blogspot.com/2007/06/regular-expressions-and-strings-with.html
# Author: Tomasz Węgrzanowski
# License: 
# Creative Commons License, http://creativecommons.org/licenses/by-sa/3.0/
# GNU Free Documentation License, http://en.wikipedia.org/wiki/GNU_Free_Documentation_License


def hash_or_die(kw)
  Hash.new{|ht,k| raise "Unknown key: #{k}"}.merge(kw)
end

def parse(data)
  esc = hash_or_die "\\" => "A", "\"" => "B", "n" => "C", "'" => "D"
  rev_esc = hash_or_die "A" => "\\", 'B' => "\"", "C" => "n", "D" => "'"
  data = data.gsub(/\\(.)/) {"\x00" + esc[$1]}
  strs = []
  data = data.gsub(/('[^']*')/) { # '
    strs << $1
    "\x01<#{strs.size-1}>"
  }
  records = []
  data.scan(/\((.*?)\)/) {
    records << $1.split(/,/).map{|field|
      field.gsub(/\x01<(\d+)>/) {
        strs[$1.to_i]}.gsub(/\x00(.)/){ rev_esc[$1]
      }
    }
  }
  records
end

def sql_str_unquote(str)
  str =~ /\A'(.*)'\Z/ or raise "SQL string format is wrong: #{str}"
  $1.gsub(/\\(.)/) {$1}
end


=begin

page_fn = Dir["plwiki-*-page.sql"].sort[-1]
externallinks_fn = Dir["plwiki-*-externallinks.sql"].sort[-1]

pages = {}

File.open(page_fn).each{|line|
  next unless line =~ /\AINSERT INTO `page` VALUES (.*)\Z/
  parse($1).each{|id,ns,title,*stuff|
    next unless ns == "0"
    title = sql_str_unquote(title)
    pages[id] = title
  }
}

File.open(externallinks_fn).each{|line|
  next unless line =~ /\AINSERT INTO `externallinks` VALUES (.*)\Z/
  parse($1).each{|from,to,index|
    title = pages[from]
    next unless title
    to = sql_str_unquote(to)
    next unless to =~ /\Ahttp:\/\//
    puts "#{title}\t#{to}"
  }
}

=end


sql_dump = <<-EOS

INSERT INTO `page` VALUES (1,0,'Astronomia','',1800,0,0,0.600461925007833,'20070601091320',8076762,8584,0), (2,0,'AWK','',329,0,0,0.487812640599732,'20070530195555',8058046,4265,0), (4,0,'Alergologia','',108,0,0,0.580574716050713,'20070520093413',7912844,292,0), ...
INSERT INTO `page` VALUES (14880,0,'Dźwignica_linotorowa','',26,0,0,0.597327036408081,'20060814072401',4282357,727,0), (14881,0,'Urządzenia_transportowe','',91,0,0,0.176666489966834,'20070527090143',2976610,1041,0), ...

EOS


pages = {}

sql_dump.each{|line|
  next unless line =~ /\AINSERT INTO `page` VALUES (.*)\Z/
  parse($1).each{|id,ns,title,*stuff|
    next unless ns == "0"
    title = sql_str_unquote(title)
    pages[id] = title
  }
}


p pages

=begin

sql_dump.each{|line|
  next unless line =~ /\AINSERT INTO `externallinks` VALUES (.*)\Z/
  parse($1).each{|from,to,index|
    title = pages[from]
    next unless title
    to = sql_str_unquote(to)
    next unless to =~ /\Ahttp:\/\//
    puts "#{title}\t#{to}"
  }
}

=end


#-----------------


require 'pp'

lisp_code = '(a (b c) (d (e) f g) (((h))))'
nodes = []

lisp_code.gsub!(/([a-z]+)/) {
  nodes << [:atom, $1]
  "<#{nodes.size-1}>"
}

#p nodes

lisp_code.gsub!(/\s/,"")
#puts lisp_code

true while lisp_code.gsub!(/\(((?:<\d+>)*)\)/) {
  #p nodes
  nodes << [:app, *$1.scan(/<(\d+)>/).map{|x,| nodes[x.to_i]}]
  "<#{nodes.size-1}>"
}
lisp_code =~ /<(\d+)>/

#puts
#p nodes
#puts

pp nodes[$1.to_i]

# Output:
#  [:app,
#  [:atom, "a"],
#  [:app, [:atom, "b"], [:atom, "c"]],
#  [:app, [:atom, "d"], [:app, [:atom, "e"]], [:atom, "f"], [:atom, "g"]],
#  [:app, [:app, [:app, [:atom, "h"]]]]]


#------------------


math_code = '(2 + 2 * 2) / ((2 + 2) * 2)'
nodes = []

math_code.gsub!(/(\d+)/) {
  nodes << $1.to_i
  "<#{nodes.size-1}>"
}
math_code.gsub!(/\s/,"")

until math_code =~ /\A<(\d+)>\Z/
  next if math_code.gsub!(/\((<\d+>)\)/) { $1 }
  next if math_code.gsub!(/<(\d+)>([\*\/])<(\d+)>/) {
    nodes << [$2, nodes[$1.to_i], nodes[$3.to_i]]
    "<#{nodes.size-1}>"
  }
  next if math_code.gsub!(/<(\d+)>([\+\-])<(\d+)>/) {
    nodes << [$2, nodes[$1.to_i], nodes[$3.to_i]]
    "<#{nodes.size-1}>"
  }
end

pp nodes[$1.to_i]

# Output:
# ["/", ["+", 2, ["*", 2, 2]], ["*", ["+", 2, 2], 2]]



CSV parsing regex

The regular expression is taken from Raimond Brookman, Regex fun with CSV.
For a good general CSV overview see The Comma Separated Value (CSV) File Format.
A complete Ruby CSV parsing library is FasterCSV (sudo gem install fastercsv).



csv_data = <<-EOS

fname,lname,age,salary
nancy,davolio,33,$30000
erin,borakova,28,$25250
tony,raphael,35,$28700

"Date","Pupil","Grade"
"25 May","Bloggs, Fred","C"
"25 May","Doe, Jane","B"
"15 July","Bloggs, Fred","D"

123456789,"Carr, Lisa",100000.00
444556666,"Barr, Clark",87000.00
777227878,"Parr, Jack",123000.00
998877665,"Charr, Lee",123000.00

Conference room 1, "John,  
Please bring the M. Mathers file for review  
-J.L.
"
10/18/2002,...

John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123

XXXX,D,3-May-02,83.01,83.58,71.13,78.04,9645300
XXXX,D,2-May-02,82.47,85.76,82.05,83.84,7210000,
XXXX,D,1-May-02,86.80,90.83,81.74,85.50,14253300

"1997",car model,E350
1997,car model,E350,"  Super luxurious truck    "
1997,car model,E350,"Go get one now
they are going fast"
1997,car model,E350,"Super ""luxurious"" truck"
1997,car model,E350,"Super, luxurious truck"

1997,car model,E350,"ac, abs, moon",3000.00
1999, car model,"Venture ""Extended Edition""",,4900.00,
1996, car model,Old Car,"BEYOND REPAIR!
air, moon roof, loaded",4799.00

This,is,a test,CSV, file," from ""http://lorance.freeshell.org/csv/test.csv""."
It contains,"quoted text",and,numbers 1234,5678
It also has,"quoted text with an embedded quote""<- right there"
Then there are a few,,blank fields like these here ->,,,
A quoted blank field,"",<- there.
A quoted blank field with newline,"\n",<- there.
This next one causes an error if newline handling is turned off.
"There is a newline here ->
<- and it should be processed correctly."
ABCD
"And here,,, is an""Error - no"
"And here,,, is an"Error - yes
"And here,,, is an",Error - no

1,2,3
ab,"c,d","e""f", "g"",""","h
jk",kl

"aaa","bbb","ccc"
zzz,yyy,xxx
"aaa","b
bb","ccc"
zzz,yyy,xxx

"aaa","b""bb","ccc"

EOS



csv_data.split(/(,|\r\n|\n|\r)(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))/m).each do<