#! @PERL@ -w # vim:syntax=perl use strict; use DB_File; use lib '@LR_PERL5LIBDIR@'; use Lire::Email; use Lire::Program qw( :msg :dlf ); my $debug = 0; sub debug { $debug and lr_debug( @_ ); } my $dbfile = shift or lr_err( "give dumpfilestem as arg\n" ); my %dlfids; # maps dlfids to nof occurrences in log if ( -e $dbfile ) { unlink $dbfile or lr_err( "cannot unlink $dbfile: $!\n"); } tie %dlfids, "DB_File", "$dbfile", or lr_err "cannot tie to $dbfile: $!\n"; my $schema = eval { Lire::DlfSchema::load_schema( "email" ) }; lr_err( "failed to load email schema: $@" ) if $@; my $dlf_maker = $schema->make_hashref2asciidlf_func( qw/time logrelay queueid msgid from_user from_domain from_relay_host from_relay_ip size delay to_user to_domain to_relay_host to_relay_ip stat xstat /); my $qid = ''; my %line; # line currently being processed my ($lines, $errorlines, $outlines) = (0, 0, 0); sub print_line { my $line = shift; for my $k (sort keys %{ $line }) { # print ", $k=" . $line->{$k}; print " $k " . $line->{$k}; } print "\n"; } sub store_dlfid { my $dlfid = shift; $dlfids{$dlfid}++; } sub parse_receive { my ( $type, $log ) = @_; if ( $type eq 'pickup' ) { $line{'from_relay_host'} = "localhost"; $line{'from_relay_ip'} = '127.0.0.1'; } else { unless ( defined $log->{client} ) { lr_notice ( "no client= field found for queueid '$qid', ", "will use default" ); $log->{client} = '-'; } ($line{from_relay_host}, $line{from_relay_ip}) = splitrelay($log->{client} ); my $fromrelayhost = $line{from_relay_host}; sanitize( 'relayhost', $fromrelayhost, $line{from_relay_host} ); my $fromrelayip = $line{from_relay_ip}; sanitize('relayip', $fromrelayip, $line{from_relay_ip}); } } sub parse_cleanup { my ( $log ) = $_[0]; my $mid; if (defined $log->{'message-id'}) { $mid = $log->{'message-id'}; } elsif (defined $log->{'resent-message-id'}) { $mid = $log->{'resent-message-id'}; } else { lr_warn "no (resent-)message-id= field found for " . "queueid '$qid', using default"; $mid = '-'; } sanitize( 'msgid', $mid, $line{'msgid'} ); } sub parse_send { my ( $log ) = $_[0]; for my $k ('to', 'delay', 'status', 'relay') { die "missing '$k'= field\n" unless defined $log->{$k} } my $to; # max one to-address per postfix log line, no need for sanitize_tos here sanitize('emailadress', $log->{'to'}, $to); my ($touser, $todomain) = splitemailadress($to); my $tmp; sanitize('stat', $log->{'status'}, $tmp); my ($stat, $xstat) = splitstat($tmp); my ($tmphost, $tmpip) = splitrelay($log->{relay}); my $torelayhost; sanitize('relayhost', $tmphost, $torelayhost); my $torelayip; sanitize('relayip', $tmpip, $torelayip); $line{'to_user'} = $touser; $line{'to_domain'} = $todomain; $line{'to_relay_host'} = $torelayhost; $line{'to_relay_ip'} = $torelayip; $line{'delay'} = $log->{'delay'}; $line{'stat'} = $stat; $line{'xstat'} = $xstat; } sub parse_qmgr { my ( $log ) = @_; for my $k ('size', 'from') { # lr_err " expected to find " . # "'$k'= field, substituting default for queueid '$qid', " . # "type '$type'\n" # can occurr in case message delivery gets deferred. we find the # real from and size later, in such cases. no need to warn. $log->{$k} ||= '-'; } my $size; sanitize('size', $log->{'size'}, $size); if (!$line{'size'} or $line{'size'} eq '-') { # only update size if we can 'improve' it. some qmgr lines have # to=, relay=, delay= and status=, but no size= debug("parse_qmgr: size was " . $log->{'size'} . ", assigning $size\n"); $line{'size'} = $size; } $line{'from_relay_host'} ||= "localhost"; $line{'from_relay_ip'} ||= "127.0.0.1"; my $tmp; sanitize('emailadress', $log->{'from'}, $tmp); ( $line{'from_user'}, $line{'from_domain'}) = splitemailadress($tmp); } sub parse_postfix { my ( $log ) = $_[0]; $qid = $log->{queueid}; my $logrelay = $log->{hostname}; my $dlfid = $log->{hostname} . $qid; my ($type) = $log->{process} =~ m!^postfix/(.*)!; # Initialize default %line = ( queueid => $qid, logrelay => $logrelay, dlfid => $dlfid, type => $type, time => $log->{timestamp}, ); SWITCH: for ($type) { /^(pickup|smtpd)$/ && do { parse_receive( $type, $log ); last SWITCH; }; /^cleanup$/ && do { parse_cleanup( $log ); last SWITCH; }; # lmtp: lmtp delivery via unix socket. looks like: # Aug 27 04:02:08 mailhost postfix/lmtp[28560]: C15C085E: # to=, # relay=/var/imap/socket/lmtp[/var/imap/socket/lmtp], delay=1, # status=sent (250 2.1.5 Ok) # # my goodness! we have # Jun 21 20:41:29 tiggr postfix/smtp[17847]: AE4255DEB: # to=, # relay=my.host.com[001:10:108:201:50:fcff:fe0b:28ec], # delay=1, status=sent (250 Ok: queued as 076EFD980) # but also: # Jun 21 20:41:28 tiggr postfix/qmgr[520]: AE4255DEB: # to=<-a@local.host.com>, relay=none, delay=0, status=bounced # (invalid recipient syntax: "-a@local.host.com") # (under regular circumstances, qmgr lines look like # Jun 21 20:47:08 tiggr postfix/qmgr[520]: 718E85DEB: # from=, size=685, nrcpt=1 (queue active) # # Jan 14 14:18:13 srv1 postfix/virtual[25001]: D33803A9E07: # to=, relay=virtual, delay=126, status=sent # (mailbox) ( /^(local|smtp|lmtp|virtual)$/ || ( ($type eq 'qmgr' || $type eq 'nqmgr') && defined $log->{to} ) ) && do { parse_send( $log ); last SWITCH; }; /^(qmgr|nqmgr)$/ && do { parse_qmgr( $log ); last SWITCH; }; /^(master|pipe)$/ && do { # Ignore those last SWITCH; }; # Default die "unknown type '$type'\n"; } # make sure the nof is in sync with what we're feeding to postfix2dlf_main, # beware of skipped lines store_dlfid($dlfid); print_line(\%line); $outlines++; } my $parser = new Lire::Email(); while (<>) { chomp; $lines++; my $log; eval { $log = $parser->parse( $_ ); }; if ($@) { lr_warn( $@ ); lr_warn( "failed to parse '$_'. skipping" ); $errorlines++; next; } next unless $log->{process} =~ m!^postfix/!; # skip lines like # Dec 1 06:58:22 internetsrv postfix/smtpd[21142]: connect from ... # Dec 1 06:59:22 internetsrv postfix/smtpd[21142]: disconnect from .... # we're not using this information. next unless defined $log->{'queueid'}; eval { parse_postfix( $log ); }; if ($@) { lr_warn( $@ ); lr_warn( "failed to parse '$_'. skipping" ); $errorlines++; } } # nope, can't do # end_dlf_converter( $lines, $outlines, $errorlines ); # here: lr_db_store should store dlflines only after _main has ran # overload dlfids to store info we'll need in lr_postfix2dlf_main's # end_dlf_converter call $dlfids{'loglines'} = $lines; $dlfids{'errorlines'} = $errorlines; untie %dlfids; lr_info( "read $lines lines; output $outlines internal lines; " . "$errorlines errors" ); # lr_db_store will be handled by lr_postfix2dlf_main's end_dlf_converter # call __END__ =pod =head1 NAME postfix2dlf_pre - preprocess postfix logfiles for postfix2dlf_main =head1 SYNOPSIS B I =head1 DESCRIPTION B expect a postfix logfile on STDIN. It prints preprocessed logfile lines to STDOUT and creates a Berkeley DB holding a mapping from hostname-queueid to number-of-lines-with-this-id. I can be e.g. /tmp/dump, ../../var/dump or dump. =head1 DEVELOPERS One can use the lr_anondump tool to view the contents of the dumpfile. E.g. run $ zcat postfix.log.gz | ./postfix2dlf_pre /tmp/stem > /tmp/pre $ lr_anondump /tmp/stem $ ./postfix2dlf_main /tmp/stem < /tmp/pre > /tmp/post . =head1 EXAMPLE The raw log file Dec 1 04:02:56 internetsrv postfix/pickup[20919]: 693A3578E: uid=0 from= Dec 1 04:02:56 internetsrv postfix/cleanup[20921]: 693A3578E: message-id= Dec 1 04:02:57 internetsrv postfix/qmgr[20164]: 693A3578E: from=, size=617 (queue active) Dec 1 04:02:58 internetsrv postfix/local[20924]: 693A3578E: to=, relay=local, delay=3, status=sent (forwarded as E325C578D) will get converted to dlfid internetsrv693A3578E from_relay_host localhost from_relay_ip 127.0.0.1 logrelay internetsrv queueid 693A3578E time 1007175776 type pickup dlfid internetsrv693A3578E logrelay internetsrv msgid queueid 693A3578E time 1007175776 type cleanup dlfid internetsrv693A3578E from_domain example.com from_relay_host localhost from_relay_ip 127.0.0.1 from_user john.doe.2 logrelay internetsrv queueid 693A3578E size 617 time 1007175777 type qmgr delay 3 dlfid internetsrv693A3578E logrelay internetsrv queueid 693A3578E stat sent time 1007175778 to_domain example.com to_relay_host localhost to_relay_ip 127.0.0.1 to_user john.doe.2 type local xstat (forwarded_as_e325c578d) =head1 EXAMPLES postfix2dlf_pre will be rarely used on its own: it is called by postfix2dlf(1). Refer to the postfix2dlf manpage for examples and usage information. =head1 SEE ALSO postfix2dlf(1), postfix2dlf_main(1), sendmail2dlf(1), qmail2dlf(1), exim2dlf(1) =head1 VERSION $Id: postfix2dlf_pre.in,v 1.13 2006/07/23 13:16:34 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2002 Stichting LogReport Foundation LogReport@LogReport.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 AUTHOR Joost van Baal =cut # Local Variables: # mode: cperl # End: