Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

« Newer Snippets
Older Snippets »
Showing 1-4 of 4 total  RSS 

decompressing various archive types

// decompressing various archive types with this python script
// usage unpack <archive filename>

#!/usr/bin/env python
#
# simple python script for extracting mostly used types of archives
# this script extracts .tar, .tar.gz, .tar.bz2, .gz and .zip archives 
#

import sys	# required for fetching command line arguments 
import os	# required for calling commands for archive extracting

def unpack(s):									# this is definition of depack
	if (s.find('.tar.gz') != -1):				#	function. It takes string
		os.system("tar -xvvzf " + filename)		#   filename as argument.
	elif (s.find('.tar.bz2') != -1):			#	functon than calls 
		os.system("tar -xvvjf " + filename)		#   appropriate command according
	elif (s.find('.tar') != -1):				# 	to file extension
		os.system("tar -xvvf " + filename)
	elif (s.find('.gz') != -1):
		os.system("gunzip" + filename)			
	elif (s.find('.zip') != -1):		
		os.system("unzip " + filename)
	else: print "Wrong archive or filename"		# other types not supported

try:											# this is main program
	filename = sys.argv[1]						# first argument right after
	unpack(filename)							#	'unpack' command goes in the
except IndexError:								#	filename string
	print "Filename is invalid!"				#	than the depack function is called

# try-except block is used for handling IndexError exception if no argument is passed

convert apache http combined logs into sql (and import it into a mysql database eventually)

you need to extract the data in your http server log files and put it in a database to query it with your usual tools using SQL. this perl script does just this.

it was hard to find it, that's why i put it here.

#!/usr/bin/perl -w
# Written by Aaron Jenson.
# Original source: http://www.visualprose.com/software.php
# Updated to work under Perl 5.6.1 by Edward Rudd
# Updated 24 march 2007 by Slim Amamou <slim.amamou@alpha-studios.com>
#  - output SQL with the option '--sql'
#  - added SQL create table script to the HELP
#
#  NOTE : you need the TimeDate library (http://search.cpan.org/dist/TimeDate/)
#
use strict;
use Getopt::Long qw(:config bundling);
use DBI;
use Date::Parse;

my %options = ();
my $i = 0;
my $sql = '';
my $valuesSql = '';
my $line = '';
my $dbh = 0;
my $sth = 0;
my @parts = ();
my $part;
my $TIMESTAMP = 3;
my $REQUEST_LINE = 4;
my @cols = (
	'remote_host',			## 0
	'remote_logname',		## 1
	'remote_user',			## 2
	'request_time',			## 3.string
	'time_stamp',			## 3.posix
	'request_line',			## 5
	'request_method',		## 6
	'request_uri',			## 7
	'request_args',			## 8
	'request_protocol',		## 9
	'status',				## 10
	'bytes_sent',			## 11
	'referer',				## 12
	'agent'					## 13
);
my $col = '';

GetOptions (\%options,
		"version" => sub { VERSION_MESSAGE(); exit 0; },
		"help|?" => sub { HELP_MESSAGE(); exit 0; },
		"host|h=s",
		"database|d=s",
		"table|t=s",
		"username|u=s",
		"password|p=s",
		"logfile|f=s",
		"sql");

$options{host} ||= 'localhost';
$options{database} ||= '';
$options{username} ||= '';
$options{password} ||= '';
$options{logfile} ||= '';
$options{sql} ||= '';

if( ! ($options{database} || $options{sql}))
{
	HELP_MESSAGE();
	print "Must supply a database to connect to.\n";
	exit 1;
}

if( ! $options{table} )
{
	HELP_MESSAGE();
	print "Must supply table name.\n";
	exit 1;
}

if( $options{logfile} )
{
	if( ! -e $options{logfile} )
	{
		print  "File '$options{logfile}' doesn't exist.\n";
		exit 1;
	}
	open(STDIN, "<$options{logfile}") || die "Can't open $options{logfile} for reading.";
}

if( $options{database} )
{
	$dbh = Connect();
	if (! $dbh) {
		exit 1;
	}
}

$sql = "INSERT INTO $options{table} (";
foreach $col (@cols)
{
	$sql .= "$col," if( $col );
}
chop($sql);
$sql .= ') VALUES (';
my ($linecount,$insertcount) = (0,0);
while($line = <STDIN>)
{
	$linecount++;
	@parts = SplitLogLine( $line );
	next if( $parts[$TIMESTAMP+1] == 0 );
	$valuesSql = '';
	for( $i = 0; $i < @cols; ++$i )
	{
		$parts[$i] =~ s/\\/\\\\/g;
		$parts[$i] =~ s/'/\\'/g;
		$valuesSql .= "'$parts[$i]'," if( $cols[$i] );
	}
	chop($valuesSql);

	if( $options{database} )
	{
		$sth  = $dbh->prepare("$sql$valuesSql)");
		if( ! $sth->execute() )
		{
			print "Unable to perform specified query.\n$sql$valuesSql\n" . $sth->errstr() . "\n";
		} else {
			$insertcount++;
		}
		$sth->finish();
	}
	if( $options{sql} )
	{
		print "$sql$valuesSql);\n";
	}
}
if( ! $options{sql} )
{
	print "Parsed $linecount Log lines\n";
	print "Inserted $insertcount records\n";
	print "to table '$options{table}' in database '$options{database}' on '$options{host}'\n";
}

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Connects to a MySQL database and returns the connection.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub Connect
{
	my $dsn = "DBI:mysql:$options{database};hostname=$options{host}";
	return DBI->connect( $dsn, $options{username}, $options{password} );
}


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Splits up a log line into its parts.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub SplitLogLine
{
	my $line = shift;
	my $i = 0;
	my $inQuote = 0;
	my $char = '';
	my $part = '';
	my @parts = ();
	my $count = 0;
	chomp($line);
	for( $i = 0; $i < length($line); ++$i )
	{
		$char = substr($line, $i, 1);
		if( $char eq ' ' && ! $inQuote )
		{
			## print "Found part $part.\n";
			if( $count == $TIMESTAMP )
			{
				push(@parts, "[".$part."]");
				$part = str2time($part);
			}
			push(@parts, $part);
			if( $count == $REQUEST_LINE )
			{
				my @request = split(/[ ?]/, $part);
				push(@parts, $request[0]);
				push(@parts, $request[1]);
				if( $request[3] )
				{
					push(@parts, $request[2]);
					push(@parts, $request[3]);
				}
				else
				{
					push(@parts, '');
					push(@parts, $request[2]);
				}
				$count += 5;
			}
			else
			{
				++$count;
			}
			$part = '';
		}
		elsif( $char eq '"' || $char eq '[' || $char eq ']' )
		{
			$inQuote = !$inQuote;
		}
		else
		{
			$part .= $char;
		}
	}
	push(@parts,$part) if $part;

	return @parts;
}


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Prints the usage/help message for this program.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub HELP_MESSAGE
{
	print<<EOF;
Imports an Apache combined log into a MySQL database.
Usage: mysql_import_combined_log.pl -d <database name> -t <table name> [-h <hostname>] [-u <username>] [-p <password>] [-f <filename]
 --host|-h <host name>         The host to connect to.  Default is localhost.
 --database|-d <database name> The database to use.  Required.
 --username|-u <username>      The user to connect as.
 --password|-p <password>      The user's password.
 --table|-t <table name>       The name of the table in which to insert data.
 --logfile|-f <file name>      The file to read from.  If not given, data is read from stdin.
 --sql                         Output SQL
 --help|-?                     Print out this help message.
 --version                     Print out the version of this software.

----------------------------------
-- SQL create statements for the table
--

create table <TABLE_NAME> (
    remote_host varchar(50) ,
    remote_logname varchar(50) ,
    remote_user varchar(50) ,
    request_time char(28),
    time_stamp varchar(10) ,
    request_line varchar(255),
    request_method varchar(10) ,
    request_uri varchar(255),
    request_args varchar(255),
    request_protocol varchar(10) ,
    status varchar(10) ,
    bytes_sent varchar(10) ,
    referer varchar(255) ,
    agent varchar(255)
);

EOF
}



# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Prints the version information for this program
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub VERSION_MESSAGE
{
	print "mysql_import_combined_log.pl version 1.2\n";
	print "Version 1.0 Written by Aaron Jenson.\n";
	print "Update to work with perl 5.6.1 by Edward Rudd\n";
}

1;

Extracting all subarray indices from a multi-dimensional array

Note the use of Array#fetch to define return values for missing indices!
Example: ar = []; x = ar.fetch(5, []) returns an empty array for non-existing index 5.



class Array

   def subar_indices(ar=[], temp=[])
      temp = temp.dup
      self.each_with_index { |item, index| 
         if item.class == Array
            temp << index
            ar << temp
            ar = item.subar_indices(ar, temp)
         end
      }
      ar.uniq
   end


   def extract_subar_indices
      subarray_indices = self.subar_indices  # subarray_indices is a two-dimensional array
      # puts subarray_indices.inspect
      
      return subarray_indices if subarray_indices.empty?

      first_subar = subarray_indices.shift   # the first subarray contains all two-dimensional subarray indices                

      subarray_indices.each do |subar|
         subar_size = subar.size
         str = ""
         count = -1
         (subar_size - 1).times { count += 1; str << ".fetch(subar[#{count}], [])" }
         str << ".fetch(subar[#{count + 1}], {}"
         str = "self" << str
         if eval(str).class == Hash then subar.shift; redo end
      end

      first_subar.reverse.each do |item| subarray_indices.unshift([item]) end
      subarray_indices
   end

end


array = [1, 2, [3, 4, [5, 6]], 7, 8, [9]]

indices = array.extract_subar_indices

puts indices.inspect  # [[2], [5], [2, 2]]


Extracting all keys from a multi-dimensional hash

Extract all complete key sequences from a multi-dimensional hash (with the last key not pointing to another hash; cf. h[1][2][3] vs h[1][2][3][4] below).


class Hash

   def extract_keys

      keys = []

      each_pair do |k1, v1|

         if v1.is_a?(Hash)

            v1.each_pair { |k2, v2|
               if !v2.is_a?(Hash) then keys << [k1, k2]; next end
            v2.each_pair { |k3, v3|
               if !v3.is_a?(Hash) then keys << [k1, k2, k3]; next end
            v3.each_pair { |k4, v4|
               if !v4.is_a?(Hash) then keys << [k1, k2, k3, k4]; next end
            v4.each_pair { |k5, v5|
               if !v5.is_a?(Hash) then keys << [k1, k2, k3, k4, k5]; next end
            v5.each_pair { |k6, v6|
               if !v6.is_a?(Hash) then keys << [k1, k2, k3, k4, k5, k6]; next end
               # add more v[n].each_pair ... loops to process more hash dimensions
            } } } } }      # "}" * 5

         else
            keys << [k1]
         end

      end
      
      keys

   end


   def all_values
      extract_keys.map do |subar|
         key = ""
         subar.size.times { |i| key << "[subar[#{i}]]" }
         hash_str = "self" << key << " rescue nil"   # example: "self[subar[0]][subar[1]][subar[2]][subar[3]] rescue nil"
         hash_value = eval(hash_str) 
      end
   end


#-------------------------


   # Find every path and it's value in a Hash, http://snippets.dzone.com/posts/show/3565
   # Author: Florian Aßmann

   def each_path
      raise ArgumentError unless block_given?
      self.class.each_path(self) { |path, object| yield(path, object) }
   end

   protected
   #def self.each_path(object, path = '', &block)
   def self.each_path(object, path = [], &block)   # alternative
      if object.is_a?(Hash)
         object.each do |key, value|
            #self.each_path(value, "#{ path }#{ key }/", &block)
            self.each_path(value, [path , key].flatten, &block)   # alternative
         end
      else 
         yield(path, object)
      end
   end 

end


h = {"a"=>"b", "c"=>"d", 1=>{2=>{"e"=>"f", 3=>{4=>"value"}}}} 

puts h[1][2].class          # Hash
puts h[1][2]["e"].class     # String

extracted_keys = h.extract_keys
puts extracted_keys.inspect         # [["a"], [1, 2, "e"], [1, 2, 3, 4], ["c"]]

puts h[1][2].has_key?("e")                 # true
puts extracted_keys.include?([1, 2, "e"])  # true


h = {700=>{4=>"value"}, "a"=>"b", 3=>{4=>"value"}, "c"=>"d", 1=>{2=>{"e"=>"f", 3=>{4=>"value"}, 300=>{4=>"value"}}}} 
p h
p h.extract_keys    #=> [["a"], [1, 2, "e"], [1, 2, 300, 4], [1, 2, 3, 4], ["c"], [700, 4], [3, 4]]
p h.all_values      #=> ["b", "f", "value", "value", "d", "value", "value"]


#-----------------


paths = []
complex_hash = Hash[
  :a => { :aa => '1', :ab => '2' },
  :b => { :ba => '3', :bb => '4' }
]
complex_hash.each_path { |path, value| paths << [ path, value ] }

p paths    # => [[[:b, :ba], "3"], [[:b, :bb], "4"], [[:a, :ab], "2"], [[:a, :aa], "1"]]
puts


h = {"a"=>"b", "c"=>"d", 1=>{2=>{"e"=>"f", 3=>{4=>"value"}}}} 
h = {"a"=>"b", "l" => lambda { |x| x+1 }, 1=>{2=>{"e"=>"f", 3=>{4=>"value"}}}} 
h = {700=>{4=>"value"}, "a"=>"b", nil => "NILVALUE", 3=>{4=>"value"}, "c"=>"d", 1=>{2=>{"e"=>"f", 3=>{4=>"value"}, 300=>{4=>"value"}}}} 

p h
p h.extract_keys

keys = []
h.each_path { |path, value| keys << path }
p keys   # complete key sequences (with last key not pointing to another hash)

paths = []
h.each_path { |path, value| paths << [ path, value ] }
p paths   # complete key sequences plus values

vals = []
h.each_path { |path, value| vals << value }
p vals   # all values of complete key sequences

p h.all_values   # same

« Newer Snippets
Older Snippets »
Showing 1-4 of 4 total  RSS