Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

« Newer Snippets
Older Snippets »
Showing 1-3 of 3 total  RSS 

Split Apache logs according to GeoIP country

// Split Apache logs according to GeoIP country

#!/usr/bin/perl

# $Id$

# Split Apache logs according to GeoIP country

use strict;
use warnings;

## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars)
our ($VERSION) = '$Revision$' =~ m{ \$Revision: \s+ (\S+) }xms;
## use critic

use Geo::IP;

my $gi = Geo::IP->open('/usr/local/share/GeoIP/GeoIPCity.dat', GEOIP_STANDARD);

my @logs = @ARGV;

my %record_for;

foreach my $log (@logs) {
    die "Can't read $log\n" if !-r $log;
    
    my %fh_for;
    my $num_lines_parsed = 0;
    
    my $log_fh;
    if ($log =~ m/ \.gz \z /xms) {
        open $log_fh, "gzip -cd $log |" or die "Can't open gzip pipe\n";
    }
    else {
        open $log_fh, '<', $log or die "Can't open $log\n";
    }
    
    my $log_base = $log;
    $log_base =~ s/ \.gz \z //xms;
    
    while (my $line = <$log_fh>) {
        $num_lines_parsed++;
        if (!($num_lines_parsed % 1000)) {
            print STDERR "Parsed $num_lines_parsed lines of $log\n";
        }
        
        my ($host) = $line =~ m/ \A (\S+) \s /xms;
        
        if (!exists $record_for{$host}) {
            my $record = $gi->record_by_name($host);
            $record_for{$host} = $record || 0;
        }
        
        my $country = 'unknown';
        if (exists $record_for{$host} && $record_for{$host}) {
            $country = lc($record_for{$host}->country_name());
            $country =~ s/\W+/_/gxms;
        }
        
        if (!exists $fh_for{$country}) {
            open $fh_for{$country}, '>', "$log_base.$country.out"
                or die "Can't write to $log_base.$country.out\n";
        }
        
        print {$fh_for{$country}} $line;
    }
    
    foreach my $fh (values %fh_for) {
        close $fh;
    }
    
    close $log_fh;
}

truncate Rails development/test logs

In rails applications development.log and test.log like to grow forever, which takes up space and makes them slow to grep. If I just delete them running processes with logs open might get confused. So I can use truncate instead:

truncate ~/www/*/log/*.log


Even easier, ask cron to do it for me every night:
4 22 * * * * truncate -s 0k  ~/www/*/log/*.log

Rewrite Apache logs that have incorrect dates

// Rewrite Apache logs that have incorrect dates

#!/usr/bin/perl

# $Id$

# Rewrite Apache logs that have incorrect dates.
# Example usage: $0 '28/May/2006:01:17:14 +0200' '19/Jan/2007:08:49:14 +0100' \
#                access_log.* error_log.*

use strict;
use warnings;

## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars)
our ($VERSION) = '$Revision$' =~ m{ \$Revision: \s+ (\S+) }xms;
## use critic

use HTTP::Date;

my @DAYS   = qw(Sun Mon Tue Wed Thu Fri Sat);
my @MONTHS = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);

my $wrong_datetime = shift @ARGV;
my $right_datetime = shift @ARGV;

my ($timezone) = $right_datetime =~ m/ ([+-]\d\d\d\d)\z/xms;

my $seconds_diff = str2time($right_datetime) - str2time($wrong_datetime);

foreach my $file (@ARGV) {
    print "Rewriting $file\n";
    open my $IN,  '<', $file
        or die "Can't open $file: $!\n";
    open my $OUT, '>', "$file.rewritten"
        or die "Can't write to $file.rewritten: $!\n";
    while (<$IN>) {
        if (m{
               \A
               (.+\s+) # Before date and time (if any)
               \[
               (
                   \d\d/\w\w\w/\d\d\d\d # Date
                   :\d\d:\d\d:\d\d      # Time
                   \s
                   [\+\-]\d\d\d\d       # Time zone
               )
               \]
               (\s+.+) # After date and time
               \z
             }xms) {
            print {$OUT} 
              $1, q{[},
              rewrite_access_datetime($2, $seconds_diff, $timezone),
              q{]}, $3;
        }
        elsif (m{
               \A
               \[
               (
                   \w\w\w \s \w\w\w \s \d\d \s # Date
                   \d\d:\d\d:\d\d \s           # Time
                   \d\d\d\d                    # Year
               )
               \]
               (\s+.+) # After date and time
               \z
             }xms) {
            print {$OUT} 
              q{[},
              rewrite_error_datetime($1, $seconds_diff),
              q{]}, $2;
        }
        else {
            print {$OUT} $_;
        }
    }
}

sub rewrite_access_datetime {
    my ($datetime, $seconds_diff, $timezone) = @_;
    
    my ($sign, $hours, $minutes) = $timezone =~ m/\A([+-])(\d\d)(\d\d)\z/xms;
    my $seconds_offset = ($hours * 60 + $minutes) * 60;
    
    $datetime = str2time($datetime) + $seconds_diff;
    if    ($sign eq q{+}) {
        $datetime = $datetime + $seconds_offset;
    }
    elsif ($sign eq q{-}) {
        $datetime = $datetime - $seconds_offset;
    }
    
    my ($sec, $min, $hour, $mday, $mon, $year) = gmtime $datetime;
    return sprintf '%02d/%s/%04d:%02d:%02d:%02d %s',
        $mday, $MONTHS[$mon], $year + 1900, $hour, $min, $sec, $timezone;
}

sub rewrite_error_datetime {
    my ($datetime, $seconds_diff) = @_;
    
    $datetime = str2time($datetime) + $seconds_diff;
    
    my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime $datetime;
    return sprintf '%s %s %02d %02d:%02d:%02d %04d',
        $DAYS[$wday], $MONTHS[$mon], $mday, $hour, $min, $sec, $year + 1900;
}
« Newer Snippets
Older Snippets »
Showing 1-3 of 3 total  RSS