mike.mg2.org
the rants of me... mikey g
[+ all]
[Code Snippits] CLF Log Merger & Sorter by Michael @ 11/08/06 03:02:06 PM

#!/usr/bin/env perl

use Time::Local;

unless ( $ARGV[0] ) {
    print "Usage: merge_and_sort.pl <file1> <file2> <file3> <dst_file>\n";
    exit();
}

my %date_hash = (
    Jan => 0,
    Feb => 1,
    Mar => 2,
    Apr => 3,
    May => 4,
    Jun => 5,
    Jul => 6,
    Aug => 7,
    Sep => 8,
    Oct => 9,
    Nov => 10,
    Dec => 11
);

my $debug = 0;

my @files      = @ARGV[ 0 .. ( $#ARGV - 1 ) ];
my $dst_file   = $ARGV[$#ARGV];
my $date_regex = '\s+\[([\w\:\-\/\s]+)\]\s+';
my $open       = {};
my $sorter     = {};

if ( -e $dst_file ) {
    push( @files, $dst_file );
    $dst_file = dim_now() . ".merged.log";
    print "Destination file already exists, creating file $dst_file\n";
}

foreach my $file (@files) {
    print "Opening $file!\n" if $debug;
    if ( -e $file ) {
        my $fh;
        open( $fh, '<', $file ) or warn "Can't open $file: $!\n" and next;

        # prime the structure
        if ( my $first_line = <$fh> ) {
            my $time = adate_to_epoch( line_to_date($first_line) );
            $open->{$fh} = {
                line      => $first_line,
                time      => $time,
                file_name => $file,
            };
            push( @{ $sorter->{$time} }, $fh );
        } else {
            print
"Filehandle for $file returned nothing for the first line, not even bothering!\n"
              if $debug;
        }
    } else {
        print "File $file doesn't exist!  Skipping!\n" if $debug;
    }
}

open( DST_FILE, '>', $dst_file )
  or die "Can't open destination file $dst_file: $!\n";

# main loop
while ( $line = get_next_line( $sorter, $open ) ) {
    print DST_FILE $line;
    print "to $dst_file: $line" if $debug;
}

close(DST_FILE);

sub get_next_line {
    my ( $srtr, $open ) = @_;
    my @sorted = sort { $a <=> $b } keys %$srtr;
    my $earliest = $sorted[0];

    # get the first filehandle in the array (usually the only one)
    my $fh = $srtr->{$earliest}->[0];

    # rebuild the array of filehandles open for this timestamp
    my @new_filelist;
    foreach my $filehandle ( @{ $srtr->{$earliest} } ) {
        next if $filehandle == $fh;
        push( @new_filelist, $filehandle );
    }

    # conditionally get rid of this sorter element
    if ( scalar(@new_filelist) ) {
        $srtr->{$earliest} = \@new_filelist;
    } else {
        delete( $srtr->{$earliest} );
    }

    my $return_line = $open->{$fh}->{line};
    print "$earliest returning from " . $open->{$fh}->{file_name} . "\n"
      if $debug;

    if ( my $next_line = <$fh> ) {
        my $time = adate_to_epoch( line_to_date($next_line) );
        $open->{$fh}->{line} = $next_line;
        $open->{$fh}->{time} = $time;
        push( @{ $srtr->{$time} }, $fh );
    } else {
        print $open->{$fh}->{file_name} . " reached end of file, removing!\n"
          if $debug;
        delete( $open->{$fh} );
    }

    return $return_line;

}

sub line_to_date {
    my ($line) = @_;
    if ( $line =~ /$date_regex/o ) {
        return $1;
    } else {
        print "$line doesn't look like date regex: $date_regex!\n" if $debug;
    }
}

sub adate_to_epoch {
    my ($a_date) = @_;
    if ( $a_date =~ /(\d+)\/(\w\w\w)\/(\d+)\:(\d+)\:(\d+)\:(\d+)/o ) {
        return timelocal( $6, $5, $4, $1, $date_hash{$2}, $3 );
    } else {
        print "$a_date doesn't look like an Apache date!\n" if $debug;
        return undef;
    }
}

sub ymd {
    my ($self) = @_;
    my @time = localtime;

    # return year, month, day :)
    return (
        $time[5] + 1900,
        sprintf( '%02d', $time[4] + 1 ),
        sprintf( '%02d', $time[3] )
    );
}

sub dim_now {
    my $self = shift;
    my @time = localtime(time);
    return sprintf(
        '%d%02d%02d%02d%02d%02d',
        $time[5] + 1900,
        $time[4] + 1,
        $time[3], $time[2], $time[1], $time[0]
    );
}


Name:   Url:
Subject:
C:

back to the main page