<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DZone Snippets: split code</title>
    <link>http://snippets.dzone.com/posts</link>
    <pubDate>Sat, 17 May 2008 16:02:07 GMT</pubDate>
    <description>DZone Snippets: split code</description>
    <item>
      <title>Split Apache logs according to GeoIP country</title>
      <link>http://snippets.dzone.com/posts/show/5255</link>
      <description>// Split Apache logs according to GeoIP country&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;#!/usr/bin/perl&lt;br /&gt;&lt;br /&gt;# $Id$&lt;br /&gt;&lt;br /&gt;# Split Apache logs according to GeoIP country&lt;br /&gt;&lt;br /&gt;use strict;&lt;br /&gt;use warnings;&lt;br /&gt;&lt;br /&gt;## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars)&lt;br /&gt;our ($VERSION) = '$Revision$' =~ m{ \$Revision: \s+ (\S+) }xms;&lt;br /&gt;## use critic&lt;br /&gt;&lt;br /&gt;use Geo::IP;&lt;br /&gt;&lt;br /&gt;my $gi = Geo::IP-&gt;open('/usr/local/share/GeoIP/GeoIPCity.dat', GEOIP_STANDARD);&lt;br /&gt;&lt;br /&gt;my @logs = @ARGV;&lt;br /&gt;&lt;br /&gt;my %record_for;&lt;br /&gt;&lt;br /&gt;foreach my $log (@logs) {&lt;br /&gt;    die "Can't read $log\n" if !-r $log;&lt;br /&gt;    &lt;br /&gt;    my %fh_for;&lt;br /&gt;    my $num_lines_parsed = 0;&lt;br /&gt;    &lt;br /&gt;    my $log_fh;&lt;br /&gt;    if ($log =~ m/ \.gz \z /xms) {&lt;br /&gt;        open $log_fh, "gzip -cd $log |" or die "Can't open gzip pipe\n";&lt;br /&gt;    }&lt;br /&gt;    else {&lt;br /&gt;        open $log_fh, '&lt;', $log or die "Can't open $log\n";&lt;br /&gt;    }&lt;br /&gt;    &lt;br /&gt;    my $log_base = $log;&lt;br /&gt;    $log_base =~ s/ \.gz \z //xms;&lt;br /&gt;    &lt;br /&gt;    while (my $line = &lt;$log_fh&gt;) {&lt;br /&gt;        $num_lines_parsed++;&lt;br /&gt;        if (!($num_lines_parsed % 1000)) {&lt;br /&gt;            print STDERR "Parsed $num_lines_parsed lines of $log\n";&lt;br /&gt;        }&lt;br /&gt;        &lt;br /&gt;        my ($host) = $line =~ m/ \A (\S+) \s /xms;&lt;br /&gt;        &lt;br /&gt;        if (!exists $record_for{$host}) {&lt;br /&gt;            my $record = $gi-&gt;record_by_name($host);&lt;br /&gt;            $record_for{$host} = $record || 0;&lt;br /&gt;        }&lt;br /&gt;        &lt;br /&gt;        my $country = 'unknown';&lt;br /&gt;        if (exists $record_for{$host} &amp;&amp; $record_for{$host}) {&lt;br /&gt;            $country = lc($record_for{$host}-&gt;country_name());&lt;br /&gt;            $country =~ s/\W+/_/gxms;&lt;br /&gt;        }&lt;br /&gt;        &lt;br /&gt;        if (!exists $fh_for{$country}) {&lt;br /&gt;            open $fh_for{$country}, '&gt;', "$log_base.$country.out"&lt;br /&gt;                or die "Can't write to $log_base.$country.out\n";&lt;br /&gt;        }&lt;br /&gt;        &lt;br /&gt;        print {$fh_for{$country}} $line;&lt;br /&gt;    }&lt;br /&gt;    &lt;br /&gt;    foreach my $fh (values %fh_for) {&lt;br /&gt;        close $fh;&lt;br /&gt;    }&lt;br /&gt;    &lt;br /&gt;    close $log_fh;&lt;br /&gt;}&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Wed, 19 Mar 2008 15:02:26 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/5255</guid>
      <author>iansealy (Ian Sealy)</author>
    </item>
    <item>
      <title>Splitting large Scriptella ETL files</title>
      <link>http://snippets.dzone.com/posts/show/4216</link>
      <description>The following example demonstrates how to split a large &lt;a href="http://scriptella.javaforge.com"&gt;Scriptella ETL&lt;/a&gt; file into several parts. This example is based on a traditional XML parsed entities approach:&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;&lt;!DOCTYPE etl SYSTEM "http://scriptella.javaforge.com/dtd/etl.dtd"&lt;br /&gt;[&lt;br /&gt;    &lt;!-- Declaring the first external parsed entity to include --&gt;&lt;br /&gt;    &lt;!ENTITY part1 SYSTEM "part1.xml"&gt;&lt;br /&gt;    &lt;br /&gt;    &lt;!-- Declaring the second external parsed entity to include --&gt;&lt;br /&gt;    &lt;!ENTITY part2 SYSTEM "part2.xml"&gt;&lt;br /&gt;]&gt;&lt;br /&gt;&lt;etl&gt;&lt;br /&gt;    &lt;connection driver="text"/&gt;&lt;br /&gt;&lt;br /&gt;    &lt;!-- Including file #1 --&gt;&lt;br /&gt;    &amp;part1;&lt;br /&gt;&lt;br /&gt;    &lt;script&gt;&lt;br /&gt;        content of the script&lt;br /&gt;    &lt;/script&gt;&lt;br /&gt;    &lt;br /&gt;    &lt;!-- Including file #2 --&gt;&lt;br /&gt;    &amp;part2;&lt;br /&gt;&lt;br /&gt;&lt;/etl&gt;&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Wed, 27 Jun 2007 20:33:23 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/4216</guid>
      <author>ejboy (Fyodor Kupolov)</author>
    </item>
    <item>
      <title>Java - Splitta una stringa</title>
      <link>http://snippets.dzone.com/posts/show/3589</link>
      <description>&lt;code&gt;&lt;br /&gt;	// Splitta una stringa&lt;br /&gt;	private String[] splitString(String str, String delims)&lt;br /&gt;	{&lt;br /&gt;		if(str == null)&lt;br /&gt;			return null;&lt;br /&gt;		else if(str.equals("") || delims == null || delims.length() == 0)&lt;br /&gt;			return new String[]{ str };&lt;br /&gt;		&lt;br /&gt;		String[] s;&lt;br /&gt;	  	Vector v = new Vector();&lt;br /&gt;		&lt;br /&gt;	  	int pos = 0;&lt;br /&gt;		int newpos = str.indexOf(delims, pos);;&lt;br /&gt;&lt;br /&gt;		while(newpos != -1)&lt;br /&gt;		{&lt;br /&gt;			v.addElement(str.substring(pos, newpos));&lt;br /&gt;			pos = newpos + delims.length();&lt;br /&gt;			newpos = str.indexOf(delims, pos);&lt;br /&gt;		}&lt;br /&gt;		v.addElement(str.substring(pos));&lt;br /&gt;		&lt;br /&gt;		s = new String[v.size()];&lt;br /&gt;		for(int i=0, cnt=s.length; i&lt;cnt; i++)&lt;br /&gt;			s[i] = (String) v.elementAt(i);&lt;br /&gt;		&lt;br /&gt;		return s;&lt;br /&gt;	}&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Mon, 26 Feb 2007 23:09:02 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/3589</guid>
      <author>whitetiger ()</author>
    </item>
    <item>
      <title>fractionfiles.py</title>
      <link>http://snippets.dzone.com/posts/show/3541</link>
      <description>// Splits a file into smaller ones, and joins them together.&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;#!/usr/bin/env python&lt;br /&gt;&lt;br /&gt;"""Splits and joins files. Helpful when media can't fit a file.&lt;br /&gt;Be prepared for a lot of output files!"""&lt;br /&gt;&lt;br /&gt;__author__="Andrew Pennebaker (andrew.pennebaker@gmail.com)"&lt;br /&gt;__date__="6 Jan 3006 - 12 Feb 2006"&lt;br /&gt;__copyright__="Copyright 2006 Andrew Pennebaker"&lt;br /&gt;__license__="GPL"&lt;br /&gt;__version__="0.3"&lt;br /&gt;__URL__="http://snippets.dzone.com/posts/show/3541"&lt;br /&gt;&lt;br /&gt;import sys, os&lt;br /&gt;from getopt import getopt&lt;br /&gt;&lt;br /&gt;SPLIT_MODE="SPLIT"&lt;br /&gt;JOIN_MODE="JOIN"&lt;br /&gt;&lt;br /&gt;def splitFile(name, length, number):&lt;br /&gt;	if length==None:&lt;br /&gt;		infile=open(name, "rb")&lt;br /&gt;		size=0&lt;br /&gt;		while infile.read(1)!="":&lt;br /&gt;			size+=1&lt;br /&gt;&lt;br /&gt;		infile.close()&lt;br /&gt;&lt;br /&gt;		maxlength=size/number&lt;br /&gt;		if number*maxlength&lt;size:&lt;br /&gt;			maxlength+=1&lt;br /&gt;&lt;br /&gt;	else:&lt;br /&gt;		if length&lt;1:&lt;br /&gt;			raise Exception&lt;br /&gt;&lt;br /&gt;	infile=None&lt;br /&gt;	try:&lt;br /&gt;		infile=open(name, "rb")&lt;br /&gt;	except Exception, e:&lt;br /&gt;		raise e&lt;br /&gt;&lt;br /&gt;	i=0&lt;br /&gt;	j=0&lt;br /&gt;	c=infile.read(1)&lt;br /&gt;	while c!="":&lt;br /&gt;		outfile=None&lt;br /&gt;		try:&lt;br /&gt;			outfile=open("%s.%d" % (name, j), "wb")&lt;br /&gt;		except Exception, e:&lt;br /&gt;			raise e&lt;br /&gt;&lt;br /&gt;		while i&lt;length and c!="":&lt;br /&gt;			outfile.write(c)&lt;br /&gt;			c=infile.read(1)&lt;br /&gt;			i+=1&lt;br /&gt;&lt;br /&gt;		outfile.close()&lt;br /&gt;		i=0&lt;br /&gt;		j+=1&lt;br /&gt;&lt;br /&gt;	infile.close()&lt;br /&gt;&lt;br /&gt;def joinFiles(filenames):&lt;br /&gt;	if len(filenames)&lt;1:&lt;br /&gt;		raise Exception&lt;br /&gt;&lt;br /&gt;	filenames.sort() # ...0 must be first&lt;br /&gt;&lt;br /&gt;	origFilename=filenames[0][0:-2] # take ".0" off the first file name&lt;br /&gt;	origFile=None&lt;br /&gt;&lt;br /&gt;	try:&lt;br /&gt;		origFile=open(origFilename, "wb")&lt;br /&gt;	except Exception, e:&lt;br /&gt;		raise e&lt;br /&gt;&lt;br /&gt;	c="&amp;" # dummy&lt;br /&gt;&lt;br /&gt;	for filename in filenames:&lt;br /&gt;		smallFile=None&lt;br /&gt;		try:&lt;br /&gt;			smallFile=open(filename, "rb")&lt;br /&gt;		except Exception, e:&lt;br /&gt;			raise e&lt;br /&gt;&lt;br /&gt;		c=smallFile.read(1)&lt;br /&gt;		while c!="":&lt;br /&gt;			origFile.write(c)&lt;br /&gt;			c=smallFile.read(1)&lt;br /&gt;&lt;br /&gt;		smallFile.close()&lt;br /&gt;&lt;br /&gt;	origFile.close()&lt;br /&gt;&lt;br /&gt;def usage():&lt;br /&gt;	print "Usage: %s [options] [files]" % (sys.argv[0])&lt;br /&gt;	print "\n--split &lt;file1 file 2 file 3...&gt;"&lt;br /&gt;	print "--join &lt;dir1 dir2 dir3 ...&gt;"&lt;br /&gt;	print "--maxlength &lt;bytes&gt;"&lt;br /&gt;	print "--maxfiles &lt;number&gt;"&lt;br /&gt;	print "--help (usage)"&lt;br /&gt;&lt;br /&gt;	sys.exit()&lt;br /&gt;&lt;br /&gt;def main():&lt;br /&gt;	global SPLIT_MODE&lt;br /&gt;	global JOIN_MODE&lt;br /&gt;&lt;br /&gt;	mode=SPLIT_MODE&lt;br /&gt;	filenames=[]&lt;br /&gt;	maxlength=1024&lt;br /&gt;	maxfiles=None&lt;br /&gt;&lt;br /&gt;	systemArgs=sys.argv[1:] # ignore program name&lt;br /&gt;&lt;br /&gt;	optlist=[]&lt;br /&gt;	args=[]&lt;br /&gt;&lt;br /&gt;	try:&lt;br /&gt;		optlist, args=getopt(systemArgs, None, ["split", "join", "maxlength=", "maxfiles=", "help"])&lt;br /&gt;	except Exception, e:&lt;br /&gt;		usage()&lt;br /&gt;&lt;br /&gt;	if len(optlist)&lt;1 or len(args)&lt;1:&lt;br /&gt;		usage()&lt;br /&gt;&lt;br /&gt;	for option, value in optlist:&lt;br /&gt;		if option=="--help":&lt;br /&gt;			usage()&lt;br /&gt;&lt;br /&gt;		elif option=="--split":&lt;br /&gt;			mode=SPLIT_MODE&lt;br /&gt;		elif option=="--join":&lt;br /&gt;			mode=JOIN_MODE&lt;br /&gt;		elif option=="--maxlength":&lt;br /&gt;			try:&lt;br /&gt;				maxlength=int(value)&lt;br /&gt;				if maxlength&lt;1:&lt;br /&gt;					raise Exception&lt;br /&gt;				maxfiles=None&lt;br /&gt;			except Exception, e:&lt;br /&gt;				raise "Length must be at least one"&lt;br /&gt;		elif option=="--maxfiles":&lt;br /&gt;			try:&lt;br /&gt;				maxfiles=int(value)&lt;br /&gt;				if maxfiles&lt;1:&lt;br /&gt;					raise Exception&lt;br /&gt;				maxlength=None&lt;br /&gt;			except Exception, e:&lt;br /&gt;				raise "Number must be at least one"&lt;br /&gt;&lt;br /&gt;	filenames=args&lt;br /&gt;&lt;br /&gt;	if mode==SPLIT_MODE:&lt;br /&gt;		for filename in filenames:&lt;br /&gt;			try:&lt;br /&gt;				splitFile(filename, maxlength, maxfiles)&lt;br /&gt;			except Exception, e:&lt;br /&gt;				raise e&lt;br /&gt;&lt;br /&gt;	elif mode==JOIN_MODE:&lt;br /&gt;		for directory in filenames:&lt;br /&gt;			files=["%s%s%s" % (directory, os.sep, file) for file in os.listdir(directory)]&lt;br /&gt;&lt;br /&gt;			try:&lt;br /&gt;				joinFiles(files)&lt;br /&gt;			except Exception, e:&lt;br /&gt;				raise e&lt;br /&gt;&lt;br /&gt;if __name__=="__main__":&lt;br /&gt;	main()&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Mon, 19 Feb 2007 01:01:02 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/3541</guid>
      <author>mcandre (Andrew Pennebaker)</author>
    </item>
    <item>
      <title>Split array into smaller arrays of equal size</title>
      <link>http://snippets.dzone.com/posts/show/3486</link>
      <description>Split an array of elements into a set of smaller arrays of equal size. Extra elements are preferentially assigned to earlier arrays. If there are no elements in a given returned array it will be [] (empty array)&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;# use as standalone function&lt;br /&gt;def chunk_array(array, pieces=2)&lt;br /&gt;  len = array.length;&lt;br /&gt;  mid = (len/pieces)&lt;br /&gt;  chunks = []&lt;br /&gt;  start = 0&lt;br /&gt;  1.upto(pieces) do |i|&lt;br /&gt;    last = start+mid&lt;br /&gt;    last = last-1 unless len%pieces &gt;= i&lt;br /&gt;    chunks &lt;&lt; array[start..last] || []&lt;br /&gt;    start = last+1&lt;br /&gt;  end&lt;br /&gt;  chunks&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;# use as array.chunk&lt;br /&gt;class Array&lt;br /&gt;  def chunk(pieces=2)&lt;br /&gt;    len = self.length;&lt;br /&gt;    mid = (len/pieces)&lt;br /&gt;    chunks = []&lt;br /&gt;    start = 0&lt;br /&gt;    1.upto(pieces) do |i|&lt;br /&gt;      last = start+mid&lt;br /&gt;      last = last-1 unless len%pieces &gt;= i&lt;br /&gt;      chunks &lt;&lt; self[start..last] || []&lt;br /&gt;      start = last+1&lt;br /&gt;    end&lt;br /&gt;    chunks&lt;br /&gt;  end&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;Examples of use:&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;&gt;&gt; chunk_array [1,2,3,4,5,6], 2&lt;br /&gt;=&gt; [[1, 2, 3], [4, 5, 6]]&lt;br /&gt;&lt;br /&gt;&gt;&gt; chunk_array [1,2,3,4,5,6], 3&lt;br /&gt;=&gt; [[1, 2], [3, 4], [5, 6]]&lt;br /&gt;&lt;br /&gt;&gt;&gt; chunk_array [1,2,3,4,5,6], 4&lt;br /&gt;=&gt; [[1, 2], [3, 4], [5], [6]]&lt;br /&gt;&lt;br /&gt;&gt;&gt; chunk_array [1,2,3,4,5,6,7,8,9,10], 4&lt;br /&gt;=&gt; [[1, 2, 3], [4, 5, 6], [7, 8], [9, 10]]&lt;br /&gt;&lt;br /&gt;&gt;&gt; chunk_array [1,2,3], 4&lt;br /&gt;=&gt; [[1], [2], [3], []]&lt;br /&gt;&lt;br /&gt;&gt;&gt; chunk_array [], 2&lt;br /&gt;=&gt; [[], []]&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;if you prefer the second form (more ruby-ish, but not always appropriate)&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;&gt;&gt; [1,2,3,4,5,6,7,8,9,10].chunk&lt;br /&gt;=&gt; [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]&lt;br /&gt;&lt;br /&gt;&gt;&gt; [1,2,3,4,5,6,7,8,9,10].chunk 3&lt;br /&gt;=&gt; [[1, 2, 3, 4], [5, 6, 7], [8, 9, 10]]&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;This is handy when used with a splat because you can do things like:&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;left, right = *chunk_array(all,2)&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Fri, 09 Feb 2007 22:52:07 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/3486</guid>
      <author>mattsa (Matt Sanders)</author>
    </item>
    <item>
      <title>SPLIT-UNIQUE - split a block into unique and duplicate values</title>
      <link>http://snippets.dzone.com/posts/show/2948</link>
      <description>&lt;code&gt;&lt;br /&gt;    split-unique: func [block [any-block!] /local uniq dupe dest] [&lt;br /&gt;        uniq: copy []&lt;br /&gt;        dupe: copy []&lt;br /&gt;        foreach item block [&lt;br /&gt;            dest: either find/only uniq item [dupe] [uniq]&lt;br /&gt;            append/only dest item&lt;br /&gt;        ]&lt;br /&gt;        reduce [uniq dupe]&lt;br /&gt;    ]&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Wed, 01 Nov 2006 21:35:34 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/2948</guid>
      <author>gregg.irwin (Gregg Irwin)</author>
    </item>
    <item>
      <title>GROUP - group like elements in a block</title>
      <link>http://snippets.dzone.com/posts/show/2947</link>
      <description>&lt;code&gt;&lt;br /&gt;    group: func [&lt;br /&gt;        {Returns a block of sub-blocks with items partitioned by value.}&lt;br /&gt;        block  [any-block!]&lt;br /&gt;        /local result&lt;br /&gt;    ][&lt;br /&gt;        result: copy []&lt;br /&gt;        ; First, build up a list of keys, with a place for values&lt;br /&gt;        ; to go with each key.&lt;br /&gt;        foreach item block [&lt;br /&gt;            if not find/only/skip result item 2 [&lt;br /&gt;                repend result [item copy []]&lt;br /&gt;            ]&lt;br /&gt;        ]&lt;br /&gt;        ; Add items to the block associated with each key.&lt;br /&gt;        foreach item block [append/only select result item item]&lt;br /&gt;        result&lt;br /&gt;    ]&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Wed, 01 Nov 2006 21:30:12 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/2947</guid>
      <author>gregg.irwin (Gregg Irwin)</author>
    </item>
    <item>
      <title>Split String into roughly equal-sized chunks.</title>
      <link>http://snippets.dzone.com/posts/show/2631</link>
      <description>Split a string into an array of roughly equal sized chunks based on a string or regular expression delimiter.&lt;br /&gt;Delimiter is preserved in output.&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;class String&lt;br /&gt;  def chunk_string(average_segment_size = 40, sclice_on = /\s+/)&lt;br /&gt;    out = []&lt;br /&gt;    slices_estimate = self.size.divmod(average_segment_size)&lt;br /&gt;    slice_count = (slices_estimate[1] &gt; 0 ? slices_estimate[0] + 1 : slices_estimate[0])&lt;br /&gt;    slice_guess = self.size / slice_count&lt;br /&gt;    previous_slice_location = 0&lt;br /&gt;    (1..slice_count - 1).each do&lt;br /&gt;      |i|&lt;br /&gt;      slice_location = self.nearest_split(slice_guess * i, sclice_on)&lt;br /&gt;      out &lt;&lt; self.slice(previous_slice_location..slice_location)&lt;br /&gt;      previous_slice_location = slice_location + 1&lt;br /&gt;    end&lt;br /&gt;    out &lt;&lt; self.slice(previous_slice_location..self.size)&lt;br /&gt;    out&lt;br /&gt;  end&lt;br /&gt;&lt;br /&gt;  def nearest_split(slice_start, slice_on)&lt;br /&gt;    left_scan_location  = (self.slice(0..slice_start).rindex(slice_on)).to_i&lt;br /&gt;    right_scan_location = (self.slice((slice_start+1)..self.size).index(slice_on)).to_i + slice_start&lt;br /&gt;    ((slice_start - left_scan_location) &lt; (right_scan_location - slice_start) ? left_scan_location : right_scan_location)&lt;br /&gt;  end&lt;br /&gt;end&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Thu, 21 Sep 2006 00:29:28 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/2631</guid>
      <author>duncanbeevers (Duncan Beevers)</author>
    </item>
    <item>
      <title>Space-Separated Tag Parser</title>
      <link>http://snippets.dzone.com/posts/show/1625</link>
      <description>Here is a function that accepts a string containing tags and returns an array of extracted tags. (Updated to ignore duplicates)&lt;br /&gt;&lt;code&gt;&lt;br /&gt;/**&lt;br /&gt; * Parses a String of Tags&lt;br /&gt; *&lt;br /&gt; * Tags are space delimited. Either single or double quotes mark a phrase.&lt;br /&gt; * Odd quotes will cause everything on their right to reflect as one single&lt;br /&gt; * tag or phrase. All white-space within a phrase is converted to single&lt;br /&gt; * space characters. Quotes burried within tags are ignored! Duplicate tags&lt;br /&gt; * are ignored, even duplicate phrases that are equivalent.&lt;br /&gt; *&lt;br /&gt; * Returns an array of tags.&lt;br /&gt; */&lt;br /&gt;function ParseTagString($sTagString)&lt;br /&gt;{&lt;br /&gt;	$arTags = array();		// Array of Output&lt;br /&gt;	$cPhraseQuote = null;	// Record of the quote that opened the current phrase&lt;br /&gt;	$sPhrase = null;		// Temp storage for the current phrase we are building&lt;br /&gt;	&lt;br /&gt;	// Define some constants&lt;br /&gt;	static $sTokens = " \r\n\t";	// Space, Return, Newline, Tab&lt;br /&gt;	static $sQuotes = "'\"";		// Single and Double Quotes&lt;br /&gt;	&lt;br /&gt;	// Start the State Machine&lt;br /&gt;	do&lt;br /&gt;	{&lt;br /&gt;		// Get the next token, which may be the first&lt;br /&gt;		$sToken = isset($sToken)? strtok($sTokens) : strtok($sTagString, $sTokens);&lt;br /&gt;		&lt;br /&gt;		// Are there more tokens?&lt;br /&gt;		if ($sToken === false)&lt;br /&gt;		{&lt;br /&gt;			// Ensure that the last phrase is marked as ended&lt;br /&gt;			$cPhraseQuote = null;&lt;br /&gt;		}&lt;br /&gt;		else&lt;br /&gt;		{		&lt;br /&gt;			// Are we within a phrase or not?&lt;br /&gt;			if ($cPhraseQuote !== null)&lt;br /&gt;			{&lt;br /&gt;				// Will the current token end the phrase?&lt;br /&gt;				if (substr($sToken, -1, 1) === $cPhraseQuote)&lt;br /&gt;				{&lt;br /&gt;					// Trim the last character and add to the current phrase, with a single leading space if necessary&lt;br /&gt;					if (strlen($sToken) &gt; 1) $sPhrase .= ((strlen($sPhrase) &gt; 0)? ' ' : null) . substr($sToken, 0, -1);&lt;br /&gt;					$cPhraseQuote = null;&lt;br /&gt;				}&lt;br /&gt;				else&lt;br /&gt;				{&lt;br /&gt;					// If not, add the token to the phrase, with a single leading space if necessary&lt;br /&gt;					$sPhrase .= ((strlen($sPhrase) &gt; 0)? ' ' : null) . $sToken;&lt;br /&gt;				}&lt;br /&gt;			}&lt;br /&gt;			else&lt;br /&gt;			{&lt;br /&gt;				// Will the current token start a phrase?&lt;br /&gt;				if (strpos($sQuotes, $sToken[0]) !== false)&lt;br /&gt;				{&lt;br /&gt;					// Will the current token end the phrase?&lt;br /&gt;					if ((strlen($sToken) &gt; 1) &amp;&amp; ($sToken[0] === substr($sToken, -1, 1)))&lt;br /&gt;					{&lt;br /&gt;						// The current token begins AND ends the phrase, trim the quotes&lt;br /&gt;						$sPhrase = substr($sToken, 1, -1);&lt;br /&gt;					}&lt;br /&gt;					else&lt;br /&gt;					{&lt;br /&gt;						// Remove the leading quote&lt;br /&gt;						$sPhrase = substr($sToken, 1);&lt;br /&gt;						$cPhraseQuote = $sToken[0];&lt;br /&gt;					}&lt;br /&gt;				}&lt;br /&gt;				else&lt;br /&gt;					$sPhrase = $sToken;&lt;br /&gt;			}&lt;br /&gt;		}&lt;br /&gt;		&lt;br /&gt;		// If, at this point, we are not within a phrase, the prepared phrase is complete and can be added to the array&lt;br /&gt;		if (($cPhraseQuote === null) &amp;&amp; ($sPhrase != null))&lt;br /&gt;		{&lt;br /&gt;			$sPhrase = strtolower($sPhrase);&lt;br /&gt;			if (!in_array($sPhrase, $arTags)) $arTags[] = $sPhrase;&lt;br /&gt;			$sPhrase = null;&lt;br /&gt;		}&lt;br /&gt;	}&lt;br /&gt;	while ($sToken !== false);	// Stop when we receive FALSE from strtok()&lt;br /&gt;	return $arTags;&lt;br /&gt;}&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;The string can be recreated from the array with the use of this reverse function:&lt;br /&gt;&lt;code&gt;&lt;br /&gt;/**&lt;br /&gt; * Reverses ParseTagString()&lt;br /&gt; */&lt;br /&gt;function CreateTagString($arTags)&lt;br /&gt;{&lt;br /&gt;	// Prepare each tag to be imploded&lt;br /&gt;	for ($i = 0; $i &lt; sizeof($arTags); $i++)&lt;br /&gt;	{&lt;br /&gt;		// Record findings&lt;br /&gt;		$bContainsWhitespace = false;	// Was whitespace found?&lt;br /&gt;		$cRequiredQuote = '"';			// Use double-quote by default&lt;br /&gt;		$cLastChar = null;&lt;br /&gt;	&lt;br /&gt;		// Search the tag&lt;br /&gt;		for ($j = 0; $j &lt; strlen($arTags[$i]); $j++)&lt;br /&gt;		{&lt;br /&gt;			$c = $arTags[$i][$j];&lt;br /&gt;			&lt;br /&gt;			// If the current character is a space&lt;br /&gt;			if ($c === ' ')&lt;br /&gt;			{&lt;br /&gt;				$bContainsWhitespace = true;&lt;br /&gt;				&lt;br /&gt;				// If the previous char was a double quote, we require single quotes round our phrase&lt;br /&gt;				if ($cLastChar === '"')&lt;br /&gt;				{&lt;br /&gt;					$cRequiredQuote = "'";&lt;br /&gt;					break;	// There is no more point in continuing our search, we cant handle double-mixed quotes&lt;br /&gt;				}&lt;br /&gt;			}&lt;br /&gt;			&lt;br /&gt;			// Record this char as the last char&lt;br /&gt;			$cLastChar = $c;&lt;br /&gt;		}&lt;br /&gt;		&lt;br /&gt;		// Quote if necessary&lt;br /&gt;		if ($bContainsWhitespace) $arTags[$i] = $cRequiredQuote . $arTags[$i] . $cRequiredQuote;&lt;br /&gt;	}&lt;br /&gt;	return implode(' ', $arTags);&lt;br /&gt;}&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;To test the whole system, use the following array of test cases:&lt;br /&gt;&lt;code&gt;&lt;br /&gt;$arTestInputs = array(&lt;br /&gt;	"this test ensures that words are correctly split",&lt;br /&gt;	"in this test \"phrases\" and \"multi-word phrases\" are tested",&lt;br /&gt;	"this test shows the behaviour if an \"odd quote is detected",&lt;br /&gt;	"this test shows that 'different quotes' work too",&lt;br /&gt;	"but mixed quotes fail: \"test phrase' does not stop on the quote",&lt;br /&gt;	"which can be usefull in some cases where \"the systems' requirements\" state that it is necessary",&lt;br /&gt;	"quotes need not be attached to \" their phrase \"",&lt;br /&gt;	"embedded\"quotes are ignored!",&lt;br /&gt;	"this is also usefull and demonstrates the system's coolness",&lt;br /&gt;	"redundant   white-space is   removed from \"  tags    and phrases\"",&lt;br /&gt;	"\"\"double quotes\"\" will result in single quotes!",&lt;br /&gt;	"remember that 'double-quotes\" may be nested within single quotes'",&lt;br /&gt;	"TaGs ArE NOT case SENsITiVE!",&lt;br /&gt;	"a duplicate tag will be removed from the tag list",&lt;br /&gt;	"even a \" complex phrase\" that is equivalent to another 'compleX   PHrASe   '"&lt;br /&gt;);&lt;br /&gt;&lt;br /&gt;foreach ($arTestInputs as $sTest)&lt;br /&gt;{&lt;br /&gt;	print ("&lt;pre&gt;$sTest&lt;/pre&gt;");&lt;br /&gt;	print "&lt;pre&gt;";&lt;br /&gt;	print_r (ParseTagString($sTest));&lt;br /&gt;	print "&lt;/pre&gt;";&lt;br /&gt;	print "&lt;pre&gt;";&lt;br /&gt;	print CreateTagString(ParseTagString($sTest));&lt;br /&gt;	print "&lt;/pre&gt;";&lt;br /&gt;	print "&lt;hr /&gt;";&lt;br /&gt;}&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;2006-03-09 0.1.0 - 0.2.0 Duplicate phrases are now ignored.&lt;br /&gt;&lt;br /&gt;-- &lt;br /&gt;Version 0.2.0 - 2006-03-09&lt;br /&gt;STEM: The STEM Cells of PHP&lt;br /&gt;This work is licensed under a Creative Commons Attribution-ShareAlike 2.5 License&lt;br /&gt;http://creativecommons.org/licenses/by-sa/2.5/</description>
      <pubDate>Fri, 03 Mar 2006 16:58:01 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/1625</guid>
      <author>Charlie (Stephen Martindale)</author>
    </item>
    <item>
      <title>splitting a text list in sql</title>
      <link>http://snippets.dzone.com/posts/show/1563</link>
      <description>This goes along with my integer split procedure.&lt;br /&gt;http://www.bigbold.com/snippets/posts/show/774&lt;br /&gt;&lt;br /&gt;Often times I have a list of integers I need to pass to the database to get worked on.  Such as checkboxes on a web page or some other list.  I needed some TSQL that would take a text string and split it by a separator, in this case a comma.  The following is the result of that need.&lt;br /&gt;The way I normally use it is in a stored procedure like the one below with several text type arguments.  This is a variation designed to split a list of strings separated by a special character sequence.  Image two lists, one of the ids and one of the data.  You parse the first list to get a table of the ids and you parse the second list to get the data and insert/update as appropriate.&lt;br /&gt;http://www.bigbold.com/snippets/posts/show/774&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;IF EXISTS (SELECT * FROM dbo.sysobjects WHERE id = object_id(N'[dbo].[uspSplitTextList]') AND OBJECTPROPERTY(id, N'IsProcedure') = 1)&lt;br /&gt;   DROP PROCEDURE [dbo].[uspSplitTextList]&lt;br /&gt;GO&lt;br /&gt;                                      &lt;br /&gt;SET QUOTED_IDENTIFIER ON &lt;br /&gt;GO&lt;br /&gt;SET ANSI_NULLS ON &lt;br /&gt;GO&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */&lt;br /&gt;-- uspSplitTextList&lt;br /&gt;--&lt;br /&gt;-- Description:&lt;br /&gt;--		splits a separated list of text items and returns the text items&lt;br /&gt;--&lt;br /&gt;-- Arguments:&lt;br /&gt;--		@list_text				- list of text items&lt;br /&gt;--		@Delimiter				- delimiter&lt;br /&gt;--&lt;br /&gt;-- Notes:&lt;br /&gt;-- 02/22/2006 - WSR : use DATALENGTH instead of LEN throughout because LEN doesn't count trailing blanks&lt;br /&gt;--&lt;br /&gt;-- History:&lt;br /&gt;-- 02/22/2006 - WSR : revised algorithm to account for items crossing 8000 character boundary&lt;br /&gt;--&lt;br /&gt;CREATE PROCEDURE uspSplitTextList&lt;br /&gt;	@list_text				text,&lt;br /&gt;   @Delimiter				varchar(3)&lt;br /&gt;AS&lt;br /&gt;&lt;br /&gt;SET NOCOUNT ON&lt;br /&gt;&lt;br /&gt;DECLARE @InputLen			integer			-- input text length&lt;br /&gt;DECLARE @TextPos			integer			-- current position within input text&lt;br /&gt;DECLARE @Chunk				varchar(8000)	-- chunk within input text&lt;br /&gt;DECLARE @ChunkPos			integer			-- current position within chunk&lt;br /&gt;DECLARE @DelimPos			integer			-- position of delimiter&lt;br /&gt;DECLARE @ChunkLen			integer			-- chunk length&lt;br /&gt;DECLARE @DelimLen			integer			-- delimiter length&lt;br /&gt;DECLARE @ItemBegPos		integer			-- item starting position in text&lt;br /&gt;DECLARE @ItemOrder		integer			-- item order in list&lt;br /&gt;DECLARE @DelimChar		varchar(1)		-- first character of delimiter (simple delimiter)&lt;br /&gt;&lt;br /&gt;-- create table to hold list items&lt;br /&gt;-- actually their positions because we may want to scrub this list eliminating bad entries before substring is applied&lt;br /&gt;CREATE TABLE #list_items ( item_order integer, item_begpos integer, item_endpos integer )&lt;br /&gt;&lt;br /&gt;-- process list&lt;br /&gt;IF @list_text IS NOT NULL&lt;br /&gt;   BEGIN&lt;br /&gt;&lt;br /&gt;	-- initialize&lt;br /&gt;   SET @InputLen = DATALENGTH(@list_text)&lt;br /&gt;   SET @TextPos = 1&lt;br /&gt;	SET @DelimChar = SUBSTRING(@Delimiter, 1, 1)&lt;br /&gt;	SET @DelimLen = DATALENGTH(@Delimiter)&lt;br /&gt;   SET @ItemBegPos = 1&lt;br /&gt;   SET @ItemOrder = 1&lt;br /&gt;   SET @ChunkLen = 1&lt;br /&gt;&lt;br /&gt;   -- cycle through input processing chunks&lt;br /&gt;   WHILE @TextPos &lt;= @InputLen AND @ChunkLen &lt;&gt; 0&lt;br /&gt;      BEGIN&lt;br /&gt;&lt;br /&gt;      -- get current chunk&lt;br /&gt;      SET @Chunk = SUBSTRING(@list_text, @TextPos, 8000)&lt;br /&gt;&lt;br /&gt;      -- setup initial variable values&lt;br /&gt;      SET @ChunkPos = 1&lt;br /&gt;      SET @ChunkLen = DATALENGTH(@Chunk)&lt;br /&gt;      SET @DelimPos = CHARINDEX(@DelimChar, @Chunk, @ChunkPos)&lt;br /&gt;&lt;br /&gt;      -- loop over the chunk, until the last delimiter&lt;br /&gt;      WHILE @ChunkPos &lt;= @ChunkLen AND @DelimPos &lt;&gt; 0&lt;br /&gt;         BEGIN&lt;br /&gt;&lt;br /&gt;			-- see if this is a full delimiter&lt;br /&gt;         IF SUBSTRING(@list_text, (@TextPos + @DelimPos - 1), @DelimLen) = @Delimiter&lt;br /&gt;            BEGIN&lt;br /&gt;&lt;br /&gt;				-- insert position&lt;br /&gt;	         INSERT INTO #list_items (item_order, item_begpos, item_endpos)&lt;br /&gt;	         VALUES (@ItemOrder, @ItemBegPos, (@TextPos + @DelimPos - 1) - 1)&lt;br /&gt;	         &lt;br /&gt;	         -- adjust positions&lt;br /&gt;	         SET @ItemOrder = @ItemOrder + 1&lt;br /&gt;	         SET @ItemBegPos = (@TextPos + @DelimPos - 1) + @DelimLen&lt;br /&gt;	         SET @ChunkPos = @DelimPos + @DelimLen&lt;br /&gt;&lt;br /&gt;				END&lt;br /&gt;         ELSE&lt;br /&gt;            BEGIN&lt;br /&gt;&lt;br /&gt;            -- adjust positions&lt;br /&gt;            SET @ChunkPos = @DelimPos + 1&lt;br /&gt;&lt;br /&gt;            END&lt;br /&gt;      &lt;br /&gt;         -- find next delimiter      &lt;br /&gt;         SET @DelimPos = CHARINDEX(@DelimChar, @Chunk, @ChunkPos)&lt;br /&gt;&lt;br /&gt;         END&lt;br /&gt;&lt;br /&gt;      -- adjust positions&lt;br /&gt;      SET @TextPos = @TextPos + @ChunkLen&lt;br /&gt;&lt;br /&gt;      END&lt;br /&gt;&lt;br /&gt;	-- handle last item&lt;br /&gt;   IF @ItemBegPos &lt;= @InputLen&lt;br /&gt;      BEGIN&lt;br /&gt;&lt;br /&gt;      -- insert position&lt;br /&gt;      INSERT INTO #list_items (item_order, item_begpos, item_endpos)&lt;br /&gt;      VALUES (@ItemOrder, @ItemBegPos, @InputLen)&lt;br /&gt;&lt;br /&gt;      END&lt;br /&gt;&lt;br /&gt;	-- delete the bad items&lt;br /&gt;   DELETE FROM #list_items&lt;br /&gt;   WHERE item_endpos &lt; item_begpos&lt;br /&gt;&lt;br /&gt;   -- return list items&lt;br /&gt;	SELECT SUBSTRING(@list_text, item_begpos, (item_endpos - item_begpos + 1)) AS item_text, item_order, item_begpos, item_endpos&lt;br /&gt;   FROM #list_items&lt;br /&gt;   ORDER BY item_order&lt;br /&gt;&lt;br /&gt;   END&lt;br /&gt;&lt;br /&gt;DROP TABLE #list_items&lt;br /&gt;&lt;br /&gt;RETURN&lt;br /&gt;&lt;br /&gt;/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */&lt;br /&gt;&lt;br /&gt;GO&lt;br /&gt;SET QUOTED_IDENTIFIER OFF &lt;br /&gt;GO&lt;br /&gt;SET ANSI_NULLS ON &lt;br /&gt;GO&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Fri, 24 Feb 2006 04:12:02 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/1563</guid>
      <author>Will_Rickards (Will Rickards)</author>
    </item>
  </channel>
</rss>
