<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DZone Snippets: load code</title>
    <link>http://snippets.dzone.com/posts</link>
    <pubDate>Mon, 13 Oct 2008 22:04:15 GMT</pubDate>
    <description>DZone Snippets: load code</description>
    <item>
      <title>convert apache http combined logs into sql (and import it into a mysql database eventually)</title>
      <link>http://snippets.dzone.com/posts/show/3721</link>
      <description>you need to extract the data in your http server log files and put it in a database to query it with your usual tools using SQL. this perl script does just this.&lt;br /&gt;&lt;br /&gt;it was hard to find it, that's why i put it here.&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;#!/usr/bin/perl -w&lt;br /&gt;# Written by Aaron Jenson.&lt;br /&gt;# Original source: http://www.visualprose.com/software.php&lt;br /&gt;# Updated to work under Perl 5.6.1 by Edward Rudd&lt;br /&gt;# Updated 24 march 2007 by Slim Amamou &lt;slim.amamou@alpha-studios.com&gt;&lt;br /&gt;#  - output SQL with the option '--sql'&lt;br /&gt;#  - added SQL create table script to the HELP&lt;br /&gt;#&lt;br /&gt;#  NOTE : you need the TimeDate library (http://search.cpan.org/dist/TimeDate/)&lt;br /&gt;#&lt;br /&gt;use strict;&lt;br /&gt;use Getopt::Long qw(:config bundling);&lt;br /&gt;use DBI;&lt;br /&gt;use Date::Parse;&lt;br /&gt;&lt;br /&gt;my %options = ();&lt;br /&gt;my $i = 0;&lt;br /&gt;my $sql = '';&lt;br /&gt;my $valuesSql = '';&lt;br /&gt;my $line = '';&lt;br /&gt;my $dbh = 0;&lt;br /&gt;my $sth = 0;&lt;br /&gt;my @parts = ();&lt;br /&gt;my $part;&lt;br /&gt;my $TIMESTAMP = 3;&lt;br /&gt;my $REQUEST_LINE = 4;&lt;br /&gt;my @cols = (&lt;br /&gt;	'remote_host',			## 0&lt;br /&gt;	'remote_logname',		## 1&lt;br /&gt;	'remote_user',			## 2&lt;br /&gt;	'request_time',			## 3.string&lt;br /&gt;	'time_stamp',			## 3.posix&lt;br /&gt;	'request_line',			## 5&lt;br /&gt;	'request_method',		## 6&lt;br /&gt;	'request_uri',			## 7&lt;br /&gt;	'request_args',			## 8&lt;br /&gt;	'request_protocol',		## 9&lt;br /&gt;	'status',				## 10&lt;br /&gt;	'bytes_sent',			## 11&lt;br /&gt;	'referer',				## 12&lt;br /&gt;	'agent'					## 13&lt;br /&gt;);&lt;br /&gt;my $col = '';&lt;br /&gt;&lt;br /&gt;GetOptions (\%options,&lt;br /&gt;		"version" =&gt; sub { VERSION_MESSAGE(); exit 0; },&lt;br /&gt;		"help|?" =&gt; sub { HELP_MESSAGE(); exit 0; },&lt;br /&gt;		"host|h=s",&lt;br /&gt;		"database|d=s",&lt;br /&gt;		"table|t=s",&lt;br /&gt;		"username|u=s",&lt;br /&gt;		"password|p=s",&lt;br /&gt;		"logfile|f=s",&lt;br /&gt;		"sql");&lt;br /&gt;&lt;br /&gt;$options{host} ||= 'localhost';&lt;br /&gt;$options{database} ||= '';&lt;br /&gt;$options{username} ||= '';&lt;br /&gt;$options{password} ||= '';&lt;br /&gt;$options{logfile} ||= '';&lt;br /&gt;$options{sql} ||= '';&lt;br /&gt;&lt;br /&gt;if( ! ($options{database} || $options{sql}))&lt;br /&gt;{&lt;br /&gt;	HELP_MESSAGE();&lt;br /&gt;	print "Must supply a database to connect to.\n";&lt;br /&gt;	exit 1;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;if( ! $options{table} )&lt;br /&gt;{&lt;br /&gt;	HELP_MESSAGE();&lt;br /&gt;	print "Must supply table name.\n";&lt;br /&gt;	exit 1;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;if( $options{logfile} )&lt;br /&gt;{&lt;br /&gt;	if( ! -e $options{logfile} )&lt;br /&gt;	{&lt;br /&gt;		print  "File '$options{logfile}' doesn't exist.\n";&lt;br /&gt;		exit 1;&lt;br /&gt;	}&lt;br /&gt;	open(STDIN, "&lt;$options{logfile}") || die "Can't open $options{logfile} for reading.";&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;if( $options{database} )&lt;br /&gt;{&lt;br /&gt;	$dbh = Connect();&lt;br /&gt;	if (! $dbh) {&lt;br /&gt;		exit 1;&lt;br /&gt;	}&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;$sql = "INSERT INTO $options{table} (";&lt;br /&gt;foreach $col (@cols)&lt;br /&gt;{&lt;br /&gt;	$sql .= "$col," if( $col );&lt;br /&gt;}&lt;br /&gt;chop($sql);&lt;br /&gt;$sql .= ') VALUES (';&lt;br /&gt;my ($linecount,$insertcount) = (0,0);&lt;br /&gt;while($line = &lt;STDIN&gt;)&lt;br /&gt;{&lt;br /&gt;	$linecount++;&lt;br /&gt;	@parts = SplitLogLine( $line );&lt;br /&gt;	next if( $parts[$TIMESTAMP+1] == 0 );&lt;br /&gt;	$valuesSql = '';&lt;br /&gt;	for( $i = 0; $i &lt; @cols; ++$i )&lt;br /&gt;	{&lt;br /&gt;		$parts[$i] =~ s/\\/\\\\/g;&lt;br /&gt;		$parts[$i] =~ s/'/\\'/g;&lt;br /&gt;		$valuesSql .= "'$parts[$i]'," if( $cols[$i] );&lt;br /&gt;	}&lt;br /&gt;	chop($valuesSql);&lt;br /&gt;&lt;br /&gt;	if( $options{database} )&lt;br /&gt;	{&lt;br /&gt;		$sth  = $dbh-&gt;prepare("$sql$valuesSql)");&lt;br /&gt;		if( ! $sth-&gt;execute() )&lt;br /&gt;		{&lt;br /&gt;			print "Unable to perform specified query.\n$sql$valuesSql\n" . $sth-&gt;errstr() . "\n";&lt;br /&gt;		} else {&lt;br /&gt;			$insertcount++;&lt;br /&gt;		}&lt;br /&gt;		$sth-&gt;finish();&lt;br /&gt;	}&lt;br /&gt;	if( $options{sql} )&lt;br /&gt;	{&lt;br /&gt;		print "$sql$valuesSql);\n";&lt;br /&gt;	}&lt;br /&gt;}&lt;br /&gt;if( ! $options{sql} )&lt;br /&gt;{&lt;br /&gt;	print "Parsed $linecount Log lines\n";&lt;br /&gt;	print "Inserted $insertcount records\n";&lt;br /&gt;	print "to table '$options{table}' in database '$options{database}' on '$options{host}'\n";&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;# Connects to a MySQL database and returns the connection.&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;sub Connect&lt;br /&gt;{&lt;br /&gt;	my $dsn = "DBI:mysql:$options{database};hostname=$options{host}";&lt;br /&gt;	return DBI-&gt;connect( $dsn, $options{username}, $options{password} );&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;# Splits up a log line into its parts.&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;sub SplitLogLine&lt;br /&gt;{&lt;br /&gt;	my $line = shift;&lt;br /&gt;	my $i = 0;&lt;br /&gt;	my $inQuote = 0;&lt;br /&gt;	my $char = '';&lt;br /&gt;	my $part = '';&lt;br /&gt;	my @parts = ();&lt;br /&gt;	my $count = 0;&lt;br /&gt;	chomp($line);&lt;br /&gt;	for( $i = 0; $i &lt; length($line); ++$i )&lt;br /&gt;	{&lt;br /&gt;		$char = substr($line, $i, 1);&lt;br /&gt;		if( $char eq ' ' &amp;&amp; ! $inQuote )&lt;br /&gt;		{&lt;br /&gt;			## print "Found part $part.\n";&lt;br /&gt;			if( $count == $TIMESTAMP )&lt;br /&gt;			{&lt;br /&gt;				push(@parts, "[".$part."]");&lt;br /&gt;				$part = str2time($part);&lt;br /&gt;			}&lt;br /&gt;			push(@parts, $part);&lt;br /&gt;			if( $count == $REQUEST_LINE )&lt;br /&gt;			{&lt;br /&gt;				my @request = split(/[ ?]/, $part);&lt;br /&gt;				push(@parts, $request[0]);&lt;br /&gt;				push(@parts, $request[1]);&lt;br /&gt;				if( $request[3] )&lt;br /&gt;				{&lt;br /&gt;					push(@parts, $request[2]);&lt;br /&gt;					push(@parts, $request[3]);&lt;br /&gt;				}&lt;br /&gt;				else&lt;br /&gt;				{&lt;br /&gt;					push(@parts, '');&lt;br /&gt;					push(@parts, $request[2]);&lt;br /&gt;				}&lt;br /&gt;				$count += 5;&lt;br /&gt;			}&lt;br /&gt;			else&lt;br /&gt;			{&lt;br /&gt;				++$count;&lt;br /&gt;			}&lt;br /&gt;			$part = '';&lt;br /&gt;		}&lt;br /&gt;		elsif( $char eq '"' || $char eq '[' || $char eq ']' )&lt;br /&gt;		{&lt;br /&gt;			$inQuote = !$inQuote;&lt;br /&gt;		}&lt;br /&gt;		else&lt;br /&gt;		{&lt;br /&gt;			$part .= $char;&lt;br /&gt;		}&lt;br /&gt;	}&lt;br /&gt;	push(@parts,$part) if $part;&lt;br /&gt;&lt;br /&gt;	return @parts;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;# Prints the usage/help message for this program.&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;sub HELP_MESSAGE&lt;br /&gt;{&lt;br /&gt;	print&lt;&lt;EOF;&lt;br /&gt;Imports an Apache combined log into a MySQL database.&lt;br /&gt;Usage: mysql_import_combined_log.pl -d &lt;database name&gt; -t &lt;table name&gt; [-h &lt;hostname&gt;] [-u &lt;username&gt;] [-p &lt;password&gt;] [-f &lt;filename]&lt;br /&gt; --host|-h &lt;host name&gt;         The host to connect to.  Default is localhost.&lt;br /&gt; --database|-d &lt;database name&gt; The database to use.  Required.&lt;br /&gt; --username|-u &lt;username&gt;      The user to connect as.&lt;br /&gt; --password|-p &lt;password&gt;      The user's password.&lt;br /&gt; --table|-t &lt;table name&gt;       The name of the table in which to insert data.&lt;br /&gt; --logfile|-f &lt;file name&gt;      The file to read from.  If not given, data is read from stdin.&lt;br /&gt; --sql                         Output SQL&lt;br /&gt; --help|-?                     Print out this help message.&lt;br /&gt; --version                     Print out the version of this software.&lt;br /&gt;&lt;br /&gt;----------------------------------&lt;br /&gt;-- SQL create statements for the table&lt;br /&gt;--&lt;br /&gt;&lt;br /&gt;create table &lt;TABLE_NAME&gt; (&lt;br /&gt;    remote_host varchar(50) ,&lt;br /&gt;    remote_logname varchar(50) ,&lt;br /&gt;    remote_user varchar(50) ,&lt;br /&gt;    request_time char(28),&lt;br /&gt;    time_stamp varchar(10) ,&lt;br /&gt;    request_line varchar(255),&lt;br /&gt;    request_method varchar(10) ,&lt;br /&gt;    request_uri varchar(255),&lt;br /&gt;    request_args varchar(255),&lt;br /&gt;    request_protocol varchar(10) ,&lt;br /&gt;    status varchar(10) ,&lt;br /&gt;    bytes_sent varchar(10) ,&lt;br /&gt;    referer varchar(255) ,&lt;br /&gt;    agent varchar(255)&lt;br /&gt;);&lt;br /&gt;&lt;br /&gt;EOF&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;# Prints the version information for this program&lt;br /&gt;# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # &lt;br /&gt;sub VERSION_MESSAGE&lt;br /&gt;{&lt;br /&gt;	print "mysql_import_combined_log.pl version 1.2\n";&lt;br /&gt;	print "Version 1.0 Written by Aaron Jenson.\n";&lt;br /&gt;	print "Update to work with perl 5.6.1 by Edward Rudd\n";&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;1;&lt;br /&gt;&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Sat, 24 Mar 2007 10:40:53 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/3721</guid>
      <author>slim (Slim Amamou)</author>
    </item>
  </channel>
</rss>
