convert apache http combined logs into sql (and import it into a mysql database eventually)
it was hard to find it, that's why i put it here.
1 2 #!/usr/bin/perl -w 3 # Written by Aaron Jenson. 4 # Original source: http://www.visualprose.com/software.php 5 # Updated to work under Perl 5.6.1 by Edward Rudd 6 # Updated 24 march 2007 by Slim Amamou <slim.amamou@alpha-studios.com> 7 # - output SQL with the option '--sql' 8 # - added SQL create table script to the HELP 9 # 10 # NOTE : you need the TimeDate library (http://search.cpan.org/dist/TimeDate/) 11 # 12 use strict; 13 use Getopt::Long qw(:config bundling); 14 use DBI; 15 use Date::Parse; 16 17 my %options = (); 18 my $i = 0; 19 my $sql = ''; 20 my $valuesSql = ''; 21 my $line = ''; 22 my $dbh = 0; 23 my $sth = 0; 24 my @parts = (); 25 my $part; 26 my $TIMESTAMP = 3; 27 my $REQUEST_LINE = 4; 28 my @cols = ( 29 'remote_host', ## 0 30 'remote_logname', ## 1 31 'remote_user', ## 2 32 'request_time', ## 3.string 33 'time_stamp', ## 3.posix 34 'request_line', ## 5 35 'request_method', ## 6 36 'request_uri', ## 7 37 'request_args', ## 8 38 'request_protocol', ## 9 39 'status', ## 10 40 'bytes_sent', ## 11 41 'referer', ## 12 42 'agent' ## 13 43 ); 44 my $col = ''; 45 46 GetOptions (\%options, 47 "version" => sub { VERSION_MESSAGE(); exit 0; }, 48 "help|?" => sub { HELP_MESSAGE(); exit 0; }, 49 "host|h=s", 50 "database|d=s", 51 "table|t=s", 52 "username|u=s", 53 "password|p=s", 54 "logfile|f=s", 55 "sql"); 56 57 $options{host} ||= 'localhost'; 58 $options{database} ||= ''; 59 $options{username} ||= ''; 60 $options{password} ||= ''; 61 $options{logfile} ||= ''; 62 $options{sql} ||= ''; 63 64 if( ! ($options{database} || $options{sql})) 65 { 66 HELP_MESSAGE(); 67 print "Must supply a database to connect to.\n"; 68 exit 1; 69 } 70 71 if( ! $options{table} ) 72 { 73 HELP_MESSAGE(); 74 print "Must supply table name.\n"; 75 exit 1; 76 } 77 78 if( $options{logfile} ) 79 { 80 if( ! -e $options{logfile} ) 81 { 82 print "File '$options{logfile}' doesn't exist.\n"; 83 exit 1; 84 } 85 open(STDIN, "<$options{logfile}") || die "Can't open $options{logfile} for reading."; 86 } 87 88 if( $options{database} ) 89 { 90 $dbh = Connect(); 91 if (! $dbh) { 92 exit 1; 93 } 94 } 95 96 $sql = "INSERT INTO $options{table} ("; 97 foreach $col (@cols) 98 { 99 $sql .= "$col," if( $col ); 100 } 101 chop($sql); 102 $sql .= ') VALUES ('; 103 my ($linecount,$insertcount) = (0,0); 104 while($line = <STDIN>) 105 { 106 $linecount++; 107 @parts = SplitLogLine( $line ); 108 next if( $parts[$TIMESTAMP+1] == 0 ); 109 $valuesSql = ''; 110 for( $i = 0; $i < @cols; ++$i ) 111 { 112 $parts[$i] =~ s/\\/\\\\/g; 113 $parts[$i] =~ s/'/\\'/g; 114 $valuesSql .= "'$parts[$i]'," if( $cols[$i] ); 115 } 116 chop($valuesSql); 117 118 if( $options{database} ) 119 { 120 $sth = $dbh->prepare("$sql$valuesSql)"); 121 if( ! $sth->execute() ) 122 { 123 print "Unable to perform specified query.\n$sql$valuesSql\n" . $sth->errstr() . "\n"; 124 } else { 125 $insertcount++; 126 } 127 $sth->finish(); 128 } 129 if( $options{sql} ) 130 { 131 print "$sql$valuesSql);\n"; 132 } 133 } 134 if( ! $options{sql} ) 135 { 136 print "Parsed $linecount Log lines\n"; 137 print "Inserted $insertcount records\n"; 138 print "to table '$options{table}' in database '$options{database}' on '$options{host}'\n"; 139 } 140 141 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 142 # Connects to a MySQL database and returns the connection. 143 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 144 sub Connect 145 { 146 my $dsn = "DBI:mysql:$options{database};hostname=$options{host}"; 147 return DBI->connect( $dsn, $options{username}, $options{password} ); 148 } 149 150 151 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 152 # Splits up a log line into its parts. 153 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 154 sub SplitLogLine 155 { 156 my $line = shift; 157 my $i = 0; 158 my $inQuote = 0; 159 my $char = ''; 160 my $part = ''; 161 my @parts = (); 162 my $count = 0; 163 chomp($line); 164 for( $i = 0; $i < length($line); ++$i ) 165 { 166 $char = substr($line, $i, 1); 167 if( $char eq ' ' && ! $inQuote ) 168 { 169 ## print "Found part $part.\n"; 170 if( $count == $TIMESTAMP ) 171 { 172 push(@parts, "[".$part."]"); 173 $part = str2time($part); 174 } 175 push(@parts, $part); 176 if( $count == $REQUEST_LINE ) 177 { 178 my @request = split(/[ ?]/, $part); 179 push(@parts, $request[0]); 180 push(@parts, $request[1]); 181 if( $request[3] ) 182 { 183 push(@parts, $request[2]); 184 push(@parts, $request[3]); 185 } 186 else 187 { 188 push(@parts, ''); 189 push(@parts, $request[2]); 190 } 191 $count += 5; 192 } 193 else 194 { 195 ++$count; 196 } 197 $part = ''; 198 } 199 elsif( $char eq '"' || $char eq '[' || $char eq ']' ) 200 { 201 $inQuote = !$inQuote; 202 } 203 else 204 { 205 $part .= $char; 206 } 207 } 208 push(@parts,$part) if $part; 209 210 return @parts; 211 } 212 213 214 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 215 # Prints the usage/help message for this program. 216 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 217 sub HELP_MESSAGE 218 { 219 print<<EOF; 220 Imports an Apache combined log into a MySQL database. 221 Usage: mysql_import_combined_log.pl -d <database name> -t <table name> [-h <hostname>] [-u <username>] [-p <password>] [-f <filename] 222 --host|-h <host name> The host to connect to. Default is localhost. 223 --database|-d <database name> The database to use. Required. 224 --username|-u <username> The user to connect as. 225 --password|-p <password> The user's password. 226 --table|-t <table name> The name of the table in which to insert data. 227 --logfile|-f <file name> The file to read from. If not given, data is read from stdin. 228 --sql Output SQL 229 --help|-? Print out this help message. 230 --version Print out the version of this software. 231 232 ---------------------------------- 233 -- SQL create statements for the table 234 -- 235 236 create table <TABLE_NAME> ( 237 remote_host varchar(50) , 238 remote_logname varchar(50) , 239 remote_user varchar(50) , 240 request_time char(28), 241 time_stamp varchar(10) , 242 request_line varchar(255), 243 request_method varchar(10) , 244 request_uri varchar(255), 245 request_args varchar(255), 246 request_protocol varchar(10) , 247 status varchar(10) , 248 bytes_sent varchar(10) , 249 referer varchar(255) , 250 agent varchar(255) 251 ); 252 253 EOF 254 } 255 256 257 258 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 259 # Prints the version information for this program 260 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 261 sub VERSION_MESSAGE 262 { 263 print "mysql_import_combined_log.pl version 1.2\n"; 264 print "Version 1.0 Written by Aaron Jenson.\n"; 265 print "Update to work with perl 5.6.1 by Edward Rudd\n"; 266 } 267 268 1; 269