#!/usr/local/bin/ruby -w # Regular expressions and strings with embedded objects # From: http://t-a-w.blogspot.com/2007/06/regular-expressions-and-strings-with.html # Author: Tomasz Węgrzanowski # License: # Creative Commons License, http://creativecommons.org/licenses/by-sa/3.0/ # GNU Free Documentation License, http://en.wikipedia.org/wiki/GNU_Free_Documentation_License def hash_or_die(kw) Hash.new{|ht,k| raise "Unknown key: #{k}"}.merge(kw) end def parse(data) esc = hash_or_die "\\" => "A", "\"" => "B", "n" => "C", "'" => "D" rev_esc = hash_or_die "A" => "\\", 'B' => "\"", "C" => "n", "D" => "'" data = data.gsub(/\\(.)/) {"\x00" + esc[$1]} strs = [] data = data.gsub(/('[^']*')/) { # ' strs << $1 "\x01<#{strs.size-1}>" } records = [] data.scan(/\((.*?)\)/) { records << $1.split(/,/).map{|field| field.gsub(/\x01<(\d+)>/) { strs[$1.to_i]}.gsub(/\x00(.)/){ rev_esc[$1] } } } records end def sql_str_unquote(str) str =~ /\A'(.*)'\Z/ or raise "SQL string format is wrong: #{str}" $1.gsub(/\\(.)/) {$1} end =begin page_fn = Dir["plwiki-*-page.sql"].sort[-1] externallinks_fn = Dir["plwiki-*-externallinks.sql"].sort[-1] pages = {} File.open(page_fn).each{|line| next unless line =~ /\AINSERT INTO `page` VALUES (.*)\Z/ parse($1).each{|id,ns,title,*stuff| next unless ns == "0" title = sql_str_unquote(title) pages[id] = title } } File.open(externallinks_fn).each{|line| next unless line =~ /\AINSERT INTO `externallinks` VALUES (.*)\Z/ parse($1).each{|from,to,index| title = pages[from] next unless title to = sql_str_unquote(to) next unless to =~ /\Ahttp:\/\// puts "#{title}\t#{to}" } } =end sql_dump = <<-EOS INSERT INTO `page` VALUES (1,0,'Astronomia','',1800,0,0,0.600461925007833,'20070601091320',8076762,8584,0), (2,0,'AWK','',329,0,0,0.487812640599732,'20070530195555',8058046,4265,0), (4,0,'Alergologia','',108,0,0,0.580574716050713,'20070520093413',7912844,292,0), ... INSERT INTO `page` VALUES (14880,0,'Dźwignica_linotorowa','',26,0,0,0.597327036408081,'20060814072401',4282357,727,0), (14881,0,'Urządzenia_transportowe','',91,0,0,0.176666489966834,'20070527090143',2976610,1041,0), ... EOS pages = {} sql_dump.each{|line| next unless line =~ /\AINSERT INTO `page` VALUES (.*)\Z/ parse($1).each{|id,ns,title,*stuff| next unless ns == "0" title = sql_str_unquote(title) pages[id] = title } } p pages =begin sql_dump.each{|line| next unless line =~ /\AINSERT INTO `externallinks` VALUES (.*)\Z/ parse($1).each{|from,to,index| title = pages[from] next unless title to = sql_str_unquote(to) next unless to =~ /\Ahttp:\/\// puts "#{title}\t#{to}" } } =end #----------------- require 'pp' lisp_code = '(a (b c) (d (e) f g) (((h))))' nodes = [] lisp_code.gsub!(/([a-z]+)/) { nodes << [:atom, $1] "<#{nodes.size-1}>" } #p nodes lisp_code.gsub!(/\s/,"") #puts lisp_code true while lisp_code.gsub!(/\(((?:<\d+>)*)\)/) { #p nodes nodes << [:app, *$1.scan(/<(\d+)>/).map{|x,| nodes[x.to_i]}] "<#{nodes.size-1}>" } lisp_code =~ /<(\d+)>/ #puts #p nodes #puts pp nodes[$1.to_i] # Output: # [:app, # [:atom, "a"], # [:app, [:atom, "b"], [:atom, "c"]], # [:app, [:atom, "d"], [:app, [:atom, "e"]], [:atom, "f"], [:atom, "g"]], # [:app, [:app, [:app, [:atom, "h"]]]]] #------------------ math_code = '(2 + 2 * 2) / ((2 + 2) * 2)' nodes = [] math_code.gsub!(/(\d+)/) { nodes << $1.to_i "<#{nodes.size-1}>" } math_code.gsub!(/\s/,"") until math_code =~ /\A<(\d+)>\Z/ next if math_code.gsub!(/\((<\d+>)\)/) { $1 } next if math_code.gsub!(/<(\d+)>([\*\/])<(\d+)>/) { nodes << [$2, nodes[$1.to_i], nodes[$3.to_i]] "<#{nodes.size-1}>" } next if math_code.gsub!(/<(\d+)>([\+\-])<(\d+)>/) { nodes << [$2, nodes[$1.to_i], nodes[$3.to_i]] "<#{nodes.size-1}>" } end pp nodes[$1.to_i] # Output: # ["/", ["+", 2, ["*", 2, 2]], ["*", ["+", 2, 2], 2]]
You need to create an account or log in to post comments to this site.