Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

« Newer Snippets
Older Snippets »
Showing 1-10 of 14 total  RSS 

ARGV Parser

This function parse ARGV and return a string.
See exemples for more informations :

// Go : http://blackh.badfile.net/wordz/

Function :

  def options(param)
  
	i = 0
		ARGV.each  { |valeur|
		
    		if (valeur == '-' + param.to_s)
				return ARGV[i+1]
			elseif (valeur != '-' + param.to_s)
				return false
			end
		i += 1
		}
		
   end


Usage :

// cmd> ruby test.rb -o foo

	out =  self.options('o')

	if (out != false and out.empty? == false)
                   puts out # print -> foo
	end

Regex-based parsing technique


#!/usr/local/bin/ruby -w

# Regular expressions and strings with embedded objects
# From: http://t-a-w.blogspot.com/2007/06/regular-expressions-and-strings-with.html
# Author: Tomasz Węgrzanowski
# License: 
# Creative Commons License, http://creativecommons.org/licenses/by-sa/3.0/
# GNU Free Documentation License, http://en.wikipedia.org/wiki/GNU_Free_Documentation_License


def hash_or_die(kw)
  Hash.new{|ht,k| raise "Unknown key: #{k}"}.merge(kw)
end

def parse(data)
  esc = hash_or_die "\\" => "A", "\"" => "B", "n" => "C", "'" => "D"
  rev_esc = hash_or_die "A" => "\\", 'B' => "\"", "C" => "n", "D" => "'"
  data = data.gsub(/\\(.)/) {"\x00" + esc[$1]}
  strs = []
  data = data.gsub(/('[^']*')/) { # '
    strs << $1
    "\x01<#{strs.size-1}>"
  }
  records = []
  data.scan(/\((.*?)\)/) {
    records << $1.split(/,/).map{|field|
      field.gsub(/\x01<(\d+)>/) {
        strs[$1.to_i]}.gsub(/\x00(.)/){ rev_esc[$1]
      }
    }
  }
  records
end

def sql_str_unquote(str)
  str =~ /\A'(.*)'\Z/ or raise "SQL string format is wrong: #{str}"
  $1.gsub(/\\(.)/) {$1}
end


=begin

page_fn = Dir["plwiki-*-page.sql"].sort[-1]
externallinks_fn = Dir["plwiki-*-externallinks.sql"].sort[-1]

pages = {}

File.open(page_fn).each{|line|
  next unless line =~ /\AINSERT INTO `page` VALUES (.*)\Z/
  parse($1).each{|id,ns,title,*stuff|
    next unless ns == "0"
    title = sql_str_unquote(title)
    pages[id] = title
  }
}

File.open(externallinks_fn).each{|line|
  next unless line =~ /\AINSERT INTO `externallinks` VALUES (.*)\Z/
  parse($1).each{|from,to,index|
    title = pages[from]
    next unless title
    to = sql_str_unquote(to)
    next unless to =~ /\Ahttp:\/\//
    puts "#{title}\t#{to}"
  }
}

=end


sql_dump = <<-EOS

INSERT INTO `page` VALUES (1,0,'Astronomia','',1800,0,0,0.600461925007833,'20070601091320',8076762,8584,0), (2,0,'AWK','',329,0,0,0.487812640599732,'20070530195555',8058046,4265,0), (4,0,'Alergologia','',108,0,0,0.580574716050713,'20070520093413',7912844,292,0), ...
INSERT INTO `page` VALUES (14880,0,'Dźwignica_linotorowa','',26,0,0,0.597327036408081,'20060814072401',4282357,727,0), (14881,0,'Urządzenia_transportowe','',91,0,0,0.176666489966834,'20070527090143',2976610,1041,0), ...

EOS


pages = {}

sql_dump.each{|line|
  next unless line =~ /\AINSERT INTO `page` VALUES (.*)\Z/
  parse($1).each{|id,ns,title,*stuff|
    next unless ns == "0"
    title = sql_str_unquote(title)
    pages[id] = title
  }
}


p pages

=begin

sql_dump.each{|line|
  next unless line =~ /\AINSERT INTO `externallinks` VALUES (.*)\Z/
  parse($1).each{|from,to,index|
    title = pages[from]
    next unless title
    to = sql_str_unquote(to)
    next unless to =~ /\Ahttp:\/\//
    puts "#{title}\t#{to}"
  }
}

=end


#-----------------


require 'pp'

lisp_code = '(a (b c) (d (e) f g) (((h))))'
nodes = []

lisp_code.gsub!(/([a-z]+)/) {
  nodes << [:atom, $1]
  "<#{nodes.size-1}>"
}

#p nodes

lisp_code.gsub!(/\s/,"")
#puts lisp_code

true while lisp_code.gsub!(/\(((?:<\d+>)*)\)/) {
  #p nodes
  nodes << [:app, *$1.scan(/<(\d+)>/).map{|x,| nodes[x.to_i]}]
  "<#{nodes.size-1}>"
}
lisp_code =~ /<(\d+)>/

#puts
#p nodes
#puts

pp nodes[$1.to_i]

# Output:
#  [:app,
#  [:atom, "a"],
#  [:app, [:atom, "b"], [:atom, "c"]],
#  [:app, [:atom, "d"], [:app, [:atom, "e"]], [:atom, "f"], [:atom, "g"]],
#  [:app, [:app, [:app, [:atom, "h"]]]]]


#------------------


math_code = '(2 + 2 * 2) / ((2 + 2) * 2)'
nodes = []

math_code.gsub!(/(\d+)/) {
  nodes << $1.to_i
  "<#{nodes.size-1}>"
}
math_code.gsub!(/\s/,"")

until math_code =~ /\A<(\d+)>\Z/
  next if math_code.gsub!(/\((<\d+>)\)/) { $1 }
  next if math_code.gsub!(/<(\d+)>([\*\/])<(\d+)>/) {
    nodes << [$2, nodes[$1.to_i], nodes[$3.to_i]]
    "<#{nodes.size-1}>"
  }
  next if math_code.gsub!(/<(\d+)>([\+\-])<(\d+)>/) {
    nodes << [$2, nodes[$1.to_i], nodes[$3.to_i]]
    "<#{nodes.size-1}>"
  }
end

pp nodes[$1.to_i]

# Output:
# ["/", ["+", 2, ["*", 2, 2]], ["*", ["+", 2, 2], 2]]



basic parser

This is a very basic parser, reads each line and creates a string of ':'-separated id nos for mass-mailing a set of accounts in an online game

def joinIds(fin, fout):
    """reads a page from input file, joins Ids with ':'
    
    returns strings to output file, max of 50 ids in each"""
    lines = 0
    outlist = []
    for line in fin:
      if line.strip() != '':
        lines += 1
        outlist.append(line.split()[2])
        if lines % 50 == 0:
            fout.write(":".join(["%s" % Id for Id in outlist]) + "\n\n")
            outlist = []
    fout.write(":".join(["%s" % Id for Id in outlist]) + "\n\n")

try:
    SIA = raw_input('Enter a file to read: ')
    pages = open(SIA, 'r')
except IOError:
    print 'Cannot open file %s for reading. Check file exists and try again.' % SIA
    import sys
    sys.exit(0)
    

default = 'idFile.txt'
out = raw_input('File name for output: ')
if out == '':
    oFile = open(default, 'a')
else:
    default = out
    oFile = open(default, 'a')

joinIds(pages, oFile)
pages.close()
oFile.close()

print 'Id strings have been output to %s. Happy mass-mailing! ;)' % default

Python: NMEA GPGGA parser

Not tested extensivly, use with caution! I used an LD-1W and it seems to work alright with this parser.

class GPGGAParser(object):
	import logging
	
	def __init__(self, sentance):
		import time, logging
		
		logging.debug("GPPGAParser started")
		logging.debug("Trying to parse: "+sentance)
		(self.format,
		 self.utc,
		 self.latitude, 
		 self.northsouth, 
		 self.longitude, 
		 self.eastwest, 
		 self.quality, 
		 self.number_of_satellites_in_use, 
		 self.horizontal_dilution, 
		 self.altitude, 
		 self.above_sea_unit, 
		 self.geoidal_separation, 
		 self.geoidal_separation_unit, 
		 self.data_age, 
		 self.diff_ref_stationID) = sentance.split(",")

		latitude_in=float(self.latitude)
		longitude_in=float(self.longitude)
		if self.northsouth == 'S':
			latitude_in = -latitude_in
		if self.eastwest == 'W':
			longitude_in = -longitude_in

		latitude_degrees = int(latitude_in/100)
		latitude_minutes = latitude_in - latitude_degrees*100
		
		longitude_degrees = int(longitude_in/100)
		longitude_minutes = longitude_in - longitude_degrees*100
		
		self.latitude = latitude_degrees + (latitude_minutes/60)
		self.longitude = longitude_degrees + (longitude_minutes/60)
		
		self.timeOfFix = time.strftime("%H:%M:%S", time.strptime(self.utc.split(".")[0],"%H%M%S"))
		self.altitude = float(self.altitude)
		logging.debug("GPPGAParser finished")

CSV parsing regex

The regular expression is taken from Raimond Brookman, Regex fun with CSV.
For a good general CSV overview see The Comma Separated Value (CSV) File Format.
A complete Ruby CSV parsing library is FasterCSV (sudo gem install fastercsv).



csv_data = <<-EOS

fname,lname,age,salary
nancy,davolio,33,$30000
erin,borakova,28,$25250
tony,raphael,35,$28700

"Date","Pupil","Grade"
"25 May","Bloggs, Fred","C"
"25 May","Doe, Jane","B"
"15 July","Bloggs, Fred","D"

123456789,"Carr, Lisa",100000.00
444556666,"Barr, Clark",87000.00
777227878,"Parr, Jack",123000.00
998877665,"Charr, Lee",123000.00

Conference room 1, "John,  
Please bring the M. Mathers file for review  
-J.L.
"
10/18/2002,...

John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123

XXXX,D,3-May-02,83.01,83.58,71.13,78.04,9645300
XXXX,D,2-May-02,82.47,85.76,82.05,83.84,7210000,
XXXX,D,1-May-02,86.80,90.83,81.74,85.50,14253300

"1997",car model,E350
1997,car model,E350,"  Super luxurious truck    "
1997,car model,E350,"Go get one now
they are going fast"
1997,car model,E350,"Super ""luxurious"" truck"
1997,car model,E350,"Super, luxurious truck"

1997,car model,E350,"ac, abs, moon",3000.00
1999, car model,"Venture ""Extended Edition""",,4900.00,
1996, car model,Old Car,"BEYOND REPAIR!
air, moon roof, loaded",4799.00

This,is,a test,CSV, file," from ""http://lorance.freeshell.org/csv/test.csv""."
It contains,"quoted text",and,numbers 1234,5678
It also has,"quoted text with an embedded quote""<- right there"
Then there are a few,,blank fields like these here ->,,,
A quoted blank field,"",<- there.
A quoted blank field with newline,"\n",<- there.
This next one causes an error if newline handling is turned off.
"There is a newline here ->
<- and it should be processed correctly."
ABCD
"And here,,, is an""Error - no"
"And here,,, is an"Error - yes
"And here,,, is an",Error - no

1,2,3
ab,"c,d","e""f", "g"",""","h
jk",kl

"aaa","bbb","ccc"
zzz,yyy,xxx
"aaa","b
bb","ccc"
zzz,yyy,xxx

"aaa","b""bb","ccc"

EOS



csv_data.split(/(,|\r\n|\n|\r)(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))/m).each do |csv|
#csv_data.split(/[,\n\r]+(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))/m).each do |csv|

   next if csv.empty?

   csv = csv.strip

   if csv =~ /\A(".*[^"]|[^"].*")\z/m then     # examples: csv => "ab\nc"def  or  abc"de\nf"
       puts
       puts "Error:" 
       p csv 
       puts csv[/\A./mu], csv[/.\z/mu] 
       #puts csv[0..0], csv[-1..-1] 
       puts
       next
   end

   if csv =~ /\A".*"\z/m then csv.gsub!(/\A"(.*)"\z/m, '\1') end  # remove double-quotes at string beginning & end
   if csv =~ /""/m then csv.gsub!(/""/m, '"') end                 # remove a double-quote from double double-quotes

   p csv

end



PHP : Parsea HTML inyectado / Parser HTML inject

Parsea HTML Inyectado. Esto es útil cuando nos escriben código HTML o Javascript maligno en un texarea. Para más información www.php.net / Parser HTML inject. This is good when we write HTML code or bad javascript on a textarea. For more info www.php.net (sorry english)
Código fuente / Source code :

function parsearHTMLInjectado($texto)
{
	return nl2br( htmlentities($texto) );	
}

Java - Dom Parser Example

// Simple Parser XML with DOM

package parser;

import java.io.File;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class DOMParser
{
	private Document doc = null;
	
	public DOMParser()
	{
		try
		{
			doc = parserXML(new File("parser/file.xml"));
			
			visit(doc, 0);
		}
		catch(Exception error)
		{
			error.printStackTrace();
		}
	}
	
	public void visit(Node node, int level)
	{
		NodeList nl = node.getChildNodes();
		
		for(int i=0, cnt=nl.getLength(); i<cnt; i++)
		{
			System.out.println("["+nl.item(i)+"]");
			
			visit(nl.item(i), level+1);
		}
	}
	
	public Document parserXML(File file) throws SAXException, IOException, ParserConfigurationException
	{
		return DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file);
	}
	
	public static void main(String[] args)
	{
		new DOMParser();
	}
}

Python - Very Simple Parser

// Very Simple Parser

from sgmllib import SGMLParser

import urllib

class ParserHTML(SGMLParser):

	def scrivi(self):
		self.f = open('/tmp/fileOUT.html', 'w')

	def unknown_starttag(self, tag, attrs):

		value = 0
		startTAG = '<' + tag
		
		for i in attrs:
			if(i[0].lower() == i[1].lower() and not i[0] == i[1]):
				startTAG = startTAG[:-1] + ' ' + str(i[1])
				value = 1
			else:
				startTAG += ' ' + str(i[0]) + '="' + str(i[1]) + '"'
				value = 0
		
		if(value == 1): startTAG += '"'

		startTAG += '>'
		self.f.write(startTAG + "\n")

	def handle_data(self, data):

		self.f.write(data + "\n")

	def unknown_endtag(self, tag):

		self.f.write('</' + tag + '>' + "\n")

if __name__ == '__main__':

	p = ParserHTML()
	p.scrivi()
	p.feed(open('/tmp/fileIN.html', 'r').read())

CSV Parser / Writer for PHP

CSV Parser / Writer

Example A:

//cell separator, row separator, value enclosure
$csv = new CSV(';', "\r\n", '"');

//parse the string content
$csv->setContent(file_get_contents('data.csv'));

//returns an array with the CSV data
print_r($csv->getArray());



Exemple B:

$csv = new CSV(';', "\r\n", '"');
//sets up the content through an array
$csv->setArray(
	array(
		array('col"una1', "colu\r\nna2"),
		array('col;una3', 'coluna4')
	)
);
//retorns string with the CSV representation
print $csv->getContent();



<?php
//+ Jonas Raoni Soares Silva
//@ http://jsfromhell.com
class CSV{
	var $cellDelimiter;
	var $valueEnclosure;
	var $rowDelimiter;

	function CSV($cellDelimiter, $rowDelimiter, $valueEnclosure){
		$this->cellDelimiter = $cellDelimiter;
		$this->valueEnclosure = $valueEnclosure;
		$this->rowDelimiter = $rowDelimiter;
		$this->o = array();
	}
	function getArray(){
		return $this->o;
	}
	function setArray($o){
		$this->o = $o;
	}
	function getContent(){
		if(!(($bl = strlen($b = $this->rowDelimiter)) && ($dl = strlen($d = $this->cellDelimiter)) && ($ql = strlen($q = $this->valueEnclosure))))
			return '';
		for($o = $this->o, $i = -1; ++$i < count($o);){
			for($e = 0, $j = -1; ++$j < count($o[$i]);)
				(($e = strpos($o[$i][$j], $q) !== false) || strpos($o[$i][$j], $b) !== false || strpos($o[$i][$j], $d) !== false)
				&& $o[$i][$j] = $q . ($e ? str_replace($q, $q . $q, $o[$i][$j]) : $o[$i][$j]) . $q;
			$o[$i] = implode($d, $o[$i]);
		}
		return implode($b, $o);
	}
	function setContent($s){
		$this->o = array();
		if(!strlen($s))
			return true;
		if(!(($bl = strlen($b = $this->rowDelimiter)) && ($dl = strlen($d = $this->cellDelimiter)) && ($ql = strlen($q = $this->valueEnclosure))))
			return false;
		for($o = array(array('')), $this->o = &$o, $e = $r = $c = 0, $i = -1, $l = strlen($s); ++$i < $l;){
			if(!$e && substr($s, $i, $bl) == $b){
				$o[++$r][$c = 0] = '';
				$i += $bl - 1;
			}
			elseif(substr($s, $i, $ql) == $q){
				$e ? (substr($s, $i + $ql, $ql) == $q ?
				$o[$r][$c] .= substr($s, $i += $ql, $ql) : $e = 0)
				: (strlen($o[$r][$c]) == 0 ? $e = 1 : $o[$r][$c] .= substr($s, $i, $ql));
				$i += $ql - 1;
			}
			elseif(!$e && substr($s, $i, $dl) == $d){
				$o[$r][++$c] = '';
				$i += $dl - 1;
			}
			else
				$o[$r][$c] .= $s[$i];
		}
		return true;
	}
}
?>

Math Parser //JavaScript Class


This class is able to parse math expressions and also run user defined functions.

On JavaScript there's the "eval" function, that can do such things well, but this code objective was just to give me fun or a new challenge =)~

[UPDATED CODE AND HELP CAN BE FOUND HERE]

Usage:

x = new MathProcessor;
try{alert(x.parse("1+2-(3*4) + medium(2,3) - frac( 2.2231)"));}
catch(e){alert(e);}


It's possible to add more functions to the class, just add them into the "methods" property ;]

Well, that's it :)

//+ Jonas Raoni Soares Silva
//@ http://jsfromhell.com/classes/math-processor [v1.0]

MathProcessor = function(){ //v1.0
    var o = this;
    o.o = {
        "+": function(a, b){ return +a + b; },
        "-": function(a, b){ return a - b; },
        "%": function(a, b){ return a % b; },
        "/": function(a, b){ return a / b; },
        "*": function(a, b){ return a * b; },
        "^": function(a, b){ return Math.pow(a, b); },
        "~": function(a, b){ return Math.sqrt(a, b); }
    };
    o.s = { "^": 3, "~": 3, "*": 2, "/": 2, "%": 1, "+": 0, "-": 0 };
    o.u = {"+": 1, "-": -1}, o.p = {"(": 1, ")": -1};
};

MathProcessor.prototype.parse = function(e){
    for(var n, x, o = [], s = [x = this.RPN(e.replace(/ /g, "").split(""))]; s.length;)
        for((n = s[s.length-1], --s.length); n[2]; o[o.length] = n, s[s.length] = n[3], n = n[2]);
    for(; (n = o.pop()) != undefined; n[0] = this.o[n[0]](isNaN(n[2][0]) ? this.f(n[2][0]) : n[2][0], isNaN(n[3][0]) ? this.f(n[3][0]) : n[3][0]));
    return +x[0];
};

MathProcessor.prototype.methods = {
    "div": function(a, b){ return parseInt(a / b); },
    "frac": function(a){ return a - parseInt(a); },
    "sum": function(n1, n2, n3, n){ for(var r = 0, a, l = (a = arguments).length; l; r += a[--l]); return r; },
    "medium": function(a, b){ return (a + b) / 2; }
};

MathProcessor.prototype.error = function(s){
    throw new Error("MathProcessor: " + (s || "Erro na expressão"));
}

MathProcessor.prototype.RPN = function(e){
    var _, r, c = r = [, , , 0];
    if(e[0] in this.u || !e.unshift("+"))
        for(; e[1] in this.u; e[0] = this.u[e.shift()] * this.u[e[0]] + 1 ? "+" : "-");
    (c[3] = [this.u[e.shift()], c, , 0])[1][0] = "*", (r = [, , c, 0])[2][1] = r;
    (c[2] = this.v(e))[1] = c;
    (!e.length && (r = c)) || (e[0] in this.s && ((c = r)[0] = e.shift(), !e.length && this.error()));
     while(e.length){
        if(e[0] in this.u){
            for(; e[1] in this.u; e[0] = this.u[e.shift()] * this.u[e[0]] + 1 ? "+" : "-");
            (c = c[3] = ["*", c, , 0])[2] = [-1, c, , 0];
        }
        (c[3] = this.v(e))[1] = c;
        e[0] in this.s && (c = this.s[e[0]] > this.s[c[0]] ?
            ((c[3] = (_ = c[3], c[2]))[1][2] = [e.shift(), c, _, 0])[2][1] = c[2]
            : r == c ? (r = [e.shift(), , c, 0])[2][1] = r
            : ((r[2] = (_ = r[2], [e.shift(), r, ,0]))[2] = _)[1] = r[2]);
    }
    return r;
};

MathProcessor.prototype.v = function(e){
    if("0123456789.".indexOf(e[0]) + 1){
        for(var i = -1, l = e.length; ++i < l && "0123456789.".indexOf(e[i]) + 1;);
        return [+e.splice(0,i).join(""), , , 0];
    }
    else if(e[0] == "("){
        for(var i = 0, l = e.length, j = 1; ++i < l && (e[i] in this.p && (j += this.p[e[i]]), j););
        return this.RPN(l = e.splice(0,i), l.shift(), !j && e.shift());
    }
    else{