Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

About this user

Slim Amamou

« Newer Snippets
Older Snippets »
Showing 1-1 of 1 total  RSS 

convert apache http combined logs into sql (and import it into a mysql database eventually)

you need to extract the data in your http server log files and put it in a database to query it with your usual tools using SQL. this perl script does just this.

it was hard to find it, that's why i put it here.

   1  
   2  #!/usr/bin/perl -w
   3  # Written by Aaron Jenson.
   4  # Original source: http://www.visualprose.com/software.php
   5  # Updated to work under Perl 5.6.1 by Edward Rudd
   6  # Updated 24 march 2007 by Slim Amamou <slim.amamou@alpha-studios.com>
   7  #  - output SQL with the option '--sql'
   8  #  - added SQL create table script to the HELP
   9  #
  10  #  NOTE : you need the TimeDate library (http://search.cpan.org/dist/TimeDate/)
  11  #
  12  use strict;
  13  use Getopt::Long qw(:config bundling);
  14  use DBI;
  15  use Date::Parse;
  16  
  17  my %options = ();
  18  my $i = 0;
  19  my $sql = '';
  20  my $valuesSql = '';
  21  my $line = '';
  22  my $dbh = 0;
  23  my $sth = 0;
  24  my @parts = ();
  25  my $part;
  26  my $TIMESTAMP = 3;
  27  my $REQUEST_LINE = 4;
  28  my @cols = (
  29  	'remote_host',			## 0
  30  	'remote_logname',		## 1
  31  	'remote_user',			## 2
  32  	'request_time',			## 3.string
  33  	'time_stamp',			## 3.posix
  34  	'request_line',			## 5
  35  	'request_method',		## 6
  36  	'request_uri',			## 7
  37  	'request_args',			## 8
  38  	'request_protocol',		## 9
  39  	'status',				## 10
  40  	'bytes_sent',			## 11
  41  	'referer',				## 12
  42  	'agent'					## 13
  43  );
  44  my $col = '';
  45  
  46  GetOptions (\%options,
  47  		"version" => sub { VERSION_MESSAGE(); exit 0; },
  48  		"help|?" => sub { HELP_MESSAGE(); exit 0; },
  49  		"host|h=s",
  50  		"database|d=s",
  51  		"table|t=s",
  52  		"username|u=s",
  53  		"password|p=s",
  54  		"logfile|f=s",
  55  		"sql");
  56  
  57  $options{host} ||= 'localhost';
  58  $options{database} ||= '';
  59  $options{username} ||= '';
  60  $options{password} ||= '';
  61  $options{logfile} ||= '';
  62  $options{sql} ||= '';
  63  
  64  if( ! ($options{database} || $options{sql}))
  65  {
  66  	HELP_MESSAGE();
  67  	print "Must supply a database to connect to.\n";
  68  	exit 1;
  69  }
  70  
  71  if( ! $options{table} )
  72  {
  73  	HELP_MESSAGE();
  74  	print "Must supply table name.\n";
  75  	exit 1;
  76  }
  77  
  78  if( $options{logfile} )
  79  {
  80  	if( ! -e $options{logfile} )
  81  	{
  82  		print  "File '$options{logfile}' doesn't exist.\n";
  83  		exit 1;
  84  	}
  85  	open(STDIN, "<$options{logfile}") || die "Can't open $options{logfile} for reading.";
  86  }
  87  
  88  if( $options{database} )
  89  {
  90  	$dbh = Connect();
  91  	if (! $dbh) {
  92  		exit 1;
  93  	}
  94  }
  95  
  96  $sql = "INSERT INTO $options{table} (";
  97  foreach $col (@cols)
  98  {
  99  	$sql .= "$col," if( $col );
 100  }
 101  chop($sql);
 102  $sql .= ') VALUES (';
 103  my ($linecount,$insertcount) = (0,0);
 104  while($line = <STDIN>)
 105  {
 106  	$linecount++;
 107  	@parts = SplitLogLine( $line );
 108  	next if( $parts[$TIMESTAMP+1] == 0 );
 109  	$valuesSql = '';
 110  	for( $i = 0; $i < @cols; ++$i )
 111  	{
 112  		$parts[$i] =~ s/\\/\\\\/g;
 113  		$parts[$i] =~ s/'/\\'/g;
 114  		$valuesSql .= "'$parts[$i]'," if( $cols[$i] );
 115  	}
 116  	chop($valuesSql);
 117  
 118  	if( $options{database} )
 119  	{
 120  		$sth  = $dbh->prepare("$sql$valuesSql)");
 121  		if( ! $sth->execute() )
 122  		{
 123  			print "Unable to perform specified query.\n$sql$valuesSql\n" . $sth->errstr() . "\n";
 124  		} else {
 125  			$insertcount++;
 126  		}
 127  		$sth->finish();
 128  	}
 129  	if( $options{sql} )
 130  	{
 131  		print "$sql$valuesSql);\n";
 132  	}
 133  }
 134  if( ! $options{sql} )
 135  {
 136  	print "Parsed $linecount Log lines\n";
 137  	print "Inserted $insertcount records\n";
 138  	print "to table '$options{table}' in database '$options{database}' on '$options{host}'\n";
 139  }
 140  
 141  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 142  # Connects to a MySQL database and returns the connection.
 143  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 144  sub Connect
 145  {
 146  	my $dsn = "DBI:mysql:$options{database};hostname=$options{host}";
 147  	return DBI->connect( $dsn, $options{username}, $options{password} );
 148  }
 149  
 150  
 151  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 152  # Splits up a log line into its parts.
 153  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 154  sub SplitLogLine
 155  {
 156  	my $line = shift;
 157  	my $i = 0;
 158  	my $inQuote = 0;
 159  	my $char = '';
 160  	my $part = '';
 161  	my @parts = ();
 162  	my $count = 0;
 163  	chomp($line);
 164  	for( $i = 0; $i < length($line); ++$i )
 165  	{
 166  		$char = substr($line, $i, 1);
 167  		if( $char eq ' ' && ! $inQuote )
 168  		{
 169  			## print "Found part $part.\n";
 170  			if( $count == $TIMESTAMP )
 171  			{
 172  				push(@parts, "[".$part."]");
 173  				$part = str2time($part);
 174  			}
 175  			push(@parts, $part);
 176  			if( $count == $REQUEST_LINE )
 177  			{
 178  				my @request = split(/[ ?]/, $part);
 179  				push(@parts, $request[0]);
 180  				push(@parts, $request[1]);
 181  				if( $request[3] )
 182  				{
 183  					push(@parts, $request[2]);
 184  					push(@parts, $request[3]);
 185  				}
 186  				else
 187  				{
 188  					push(@parts, '');
 189  					push(@parts, $request[2]);
 190  				}
 191  				$count += 5;
 192  			}
 193  			else
 194  			{
 195  				++$count;
 196  			}
 197  			$part = '';
 198  		}
 199  		elsif( $char eq '"' || $char eq '[' || $char eq ']' )
 200  		{
 201  			$inQuote = !$inQuote;
 202  		}
 203  		else
 204  		{
 205  			$part .= $char;
 206  		}
 207  	}
 208  	push(@parts,$part) if $part;
 209  
 210  	return @parts;
 211  }
 212  
 213  
 214  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 215  # Prints the usage/help message for this program.
 216  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 217  sub HELP_MESSAGE
 218  {
 219  	print<<EOF;
 220  Imports an Apache combined log into a MySQL database.
 221  Usage: mysql_import_combined_log.pl -d <database name> -t <table name> [-h <hostname>] [-u <username>] [-p <password>] [-f <filename]
 222   --host|-h <host name>         The host to connect to.  Default is localhost.
 223   --database|-d <database name> The database to use.  Required.
 224   --username|-u <username>      The user to connect as.
 225   --password|-p <password>      The user's password.
 226   --table|-t <table name>       The name of the table in which to insert data.
 227   --logfile|-f <file name>      The file to read from.  If not given, data is read from stdin.
 228   --sql                         Output SQL
 229   --help|-?                     Print out this help message.
 230   --version                     Print out the version of this software.
 231  
 232  ----------------------------------
 233  -- SQL create statements for the table
 234  --
 235  
 236  create table <TABLE_NAME> (
 237      remote_host varchar(50) ,
 238      remote_logname varchar(50) ,
 239      remote_user varchar(50) ,
 240      request_time char(28),
 241      time_stamp varchar(10) ,
 242      request_line varchar(255),
 243      request_method varchar(10) ,
 244      request_uri varchar(255),
 245      request_args varchar(255),
 246      request_protocol varchar(10) ,
 247      status varchar(10) ,
 248      bytes_sent varchar(10) ,
 249      referer varchar(255) ,
 250      agent varchar(255)
 251  );
 252  
 253  EOF
 254  }
 255  
 256  
 257  
 258  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 259  # Prints the version information for this program
 260  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 261  sub VERSION_MESSAGE
 262  {
 263  	print "mysql_import_combined_log.pl version 1.2\n";
 264  	print "Version 1.0 Written by Aaron Jenson.\n";
 265  	print "Update to work with perl 5.6.1 by Edward Rudd\n";
 266  }
 267  
 268  1;
 269  
« Newer Snippets
Older Snippets »
Showing 1-1 of 1 total  RSS