#! @PERL@ -w # vim:syntax=perl use strict; use lib '@LR_PERL5LIBDIR@'; use Lire::Email; use Lire::DlfSchema; use Lire::Program qw( :msg :dlf ); my $debug = 0; my $schema = eval { Lire::DlfSchema::load_schema( "email" ); }; lr_err( "error loading email schema: $@" ) if $@; my $hash2dlf = $schema->make_hashref2asciidlf_func( qw/time msgid from_user from_domain size delay xdelay to_user to_domain stat xstat/ ); my $msgno; my %msgs; init_dlf_converter( "email"); my $lines = 0; my $dlflines = 0; my $errorlines = 0; while (<>) { chomp; $lines++; my @line = split ' ', $_; if ( @line < 4 ) { lr_warn( "skipping line '$_': has " . @line . " field, should have at least 4" ); $errorlines++; next; } # get '972810464.144861' my $time = shift @line; lr_debug( "number of keys in msgs: ", scalar keys %msgs ) if $debug; if ($line[0] eq 'new') { if ($line[1] eq 'msg') { # 'new msg 6172' $msgno = $line[2]; if (defined $msgs{$msgno}) { lr_info( "deleting msgno '$msgno': number is being reused" ); delete $msgs{$msgno}; } $msgs{$msgno}{time} = $time; $msgs{$msgno}{msgid} = $time . '-' . $msgno; } else { lr_warn( "skipping line '$_': it has a 'new', but second " . "field is '" . $line[1] . "', not a 'msg'" ); $errorlines++; next; } } elsif ($line[0] eq 'end') { if ($line[1] eq 'msg') { $msgno = $line[2]; my $tmp; unless ( defined $msgs{$msgno}{fromaddress} ) { lr_notice( "skipping line '$_': fromaddress not " . "found yet" ); delete $msgs{$msgno}; next; } unless (sanitize('emailadress', $msgs{$msgno}{fromaddress}, $tmp)) { # delete $msgs{$msgno}; lr_err ("cannot sanitize 'emailadress', '$tmp'" ); } ($msgs{$msgno}{from_user}, $msgs{$msgno}{from_domain}) = splitemailadress($tmp); while (my ($dlvno, $dlv) = each %{ $msgs{$msgno}{deliveries} }) { # this delivery is inheriting properties from the message # it belongs to while (my ($k, $v) = each %{ $msgs{$msgno} }) { next if $k eq 'deliveries'; if ($k eq 'time') { unless (defined $dlv->{time}) { lr_info( "found 'end msg' line for msg " . "'$msgno', but delivery '$dlvno' hasn't yet " . "got a timestamp. using the end timestamp" ); $dlv->{time} = $time; } $dlv->{xdelay} = $dlv->{time} - $v; $dlv->{delay} = $dlv->{xdelay}; # XXX is this what # we want? $dlv->{time} = $v; next; } if ( defined $dlv->{$k} ) { delete $msgs{$msgno}; lr_err ( "key '$k' already in delivery '$dlvno' for " . "message '$msgno'" ); } $dlv->{$k} = $v; } unless (sanitize('emailadress', $dlv->{toaddress}, $tmp)) { delete $msgs{$msgno}; lr_err( "cannot sanitize 'emailadress', '$tmp'" ); } ($dlv->{to_user}, $dlv->{to_domain}) = splitemailadress( $tmp ); print join( " ", @{ $hash2dlf->( $dlv ) } ), "\n"; $dlflines++; delete $msgs{$msgno}{deliveries}{$dlvno}; } delete $msgs{$msgno}; } else { lr_warn( "skipping line '$_': it has a 'end', but second " . "field is '" . $line[1] . "', not a 'msg'" ); $errorlines++; next; } # make sure msgno gets flushed in _any_ case XXX } elsif ($line[0] eq 'info') { if ($line[1] eq 'msg') { # 'info msg 6161: bytes 18099 from # qp 16700 uid 1015' ($msgno = $line[2]) =~ s/:$//; if ( $line[3] ne 'bytes' ) { lr_warn ( "evil line '$_': 'info msg', but no 'bytes', " . "skipping" ); $errorlines++; next; } unless (defined $msgs{$msgno}) { lr_info( "found info msg $msgno line, but msg $msgno " . "was not yet announced in a new msg line. " . "pretending it was" ); $msgs{$msgno} = {}; } if (defined $msgs{$msgno}{size}) { lr_info( "size for msg $msgno already defined, but " . "found new one in line '$_'. overwriting." ); } $msgs{$msgno}{size} = $line[4]; if ( $line[5] ne 'from' ) { lr_warn ( "evil line '$_': 'info msg', but no 'from', " . "skipping" ); $errorlines++; next; } if (defined $msgs{$msgno}{fromaddress}) { lr_info( "fromaddress for msg $msgno already " . "defined, but found new one in line '$_'. " . "overwriting." ); } $msgs{$msgno}{fromaddress} = $line[6]; } else { lr_warn( "skipping line '$_': it has a 'info', but second " . "field is '" . $line[1] . "', not a 'msg'" ); $errorlines++; next; } } elsif ($line[0] eq 'starting') { if ($line[1] eq 'delivery') { # starting delivery 4472: msg 6161 to local vanbaal@mdcc.cx unless ( @line == 8 ) { lr_warn( "evil line '$_': 'starting delivery', but no " . "8 fields, skipping" ); next; } (my $dlvno = $line[2]) =~ s/:$//; if ( $line[3] ne 'msg' ) { lr_warn( "evil line '$_': 'starting delivery', but no " . "'msg', skipping" ); $errorlines++; next; } $msgno = $line[4]; if ( $line[5] ne 'to' ) { lr_warn( "evil line '$_': 'starting delivery', but no " . "'to', skipping" ); $errorlines++; next; } my $dest = $line[6]; # 'local' or 'remote' my $toaddress = $line[7]; unless (defined $msgs{$msgno}) { lr_info( "found info msg $msgno line, but msg $msgno " . "was not yet announced in a new msg line. " . "pretending it was" ); $msgs{$msgno} = {}; } unless (defined $msgs{$msgno}{deliveries}) { # the first delivery for this message $msgs{$msgno}{deliveries} = {}; } # go store this delivery if (defined $msgs{$msgno}{deliveries}{$dlvno}) { lr_info( "gonna flush msg '$msgno', delivery '$dlvno': ". "reused" ); delete $msgs{$msgno}{deliveries}{$dlvno}; } $msgs{$msgno}{deliveries}{$dlvno}{dest} = $dest; $msgs{$msgno}{deliveries}{$dlvno}{toaddress} = $toaddress; } else { lr_warn( "skipping line '$_': it has a 'starting', but " . "second field is '" . $line[1] . "', not a 'delivery'" ); $errorlines++; next; } } elsif ($line[0] eq 'delivery') { # 'delivery 4466: success: did_0+0+1/' # 'delivery 4469: success: 213.46.240.6_accepted_message./R # emote_host_said:_250_Message_received:_20001029065738.E # ZXJ24328.amsmta02-svc@mdcc.cx/' # 'delivery 5037: deferral: Connected_to_216.33.151.135_but_ # connection_died._Possible_duplicate!_(#4.4.2)/' # 'delivery 5540: success: ezmlm-manage:_info:_qp_32060/did_0+0+3/ # 'delivery 5641: failure: 64.4.42.7_failed_after_I_sent_the_ # message./Remote_host_said:_554_Transaction_failed/' # 'delivery 6266: deferral: Connected_to_195.193.176.2_but_se # nder_was_rejected./Remote_host_said:_451_..._Sender_domain_must_resolve/ # 'delivery 4464: deferral: CNAME_lookup_failed_temporarily._(#4.4.3)/ # 'delivery 4533: deferral: Connected_to_212.27.32.3_but_send # er_was_rejected./Remote_host_said:_451_DNS_temporary_fai # lure_(#4.3.0)/ unless ( @line == 4 ) { lr_warn( "evil line '$_': 'delivery', " . "but no 4 fields, skipping\n" ); $errorlines++; next; } (my $dlvno = $line[1]) =~ s/:$//; # 'deferral:' 'success:' 'failure:' (my $stat = $line[2]) =~ s/:$//; my $xstat = $line[3]; # find to which message this delivery belongs my $found = 0; # Bizarre copy of HASH in aassign at ./qmail2dlf line 236, <> line 694. # while ((my $msgno, my $msg) = each %msgs) { for my $msgno (keys %msgs) { if (defined $msgs{$msgno}{deliveries}{$dlvno}) { # found it if (defined $msgs{$msgno}{deliveries}{$dlvno}{stat}) { lr_info( "found line '$_', but status for delivery " . "'$dlvno' already defined: overwriting" ); } # translate to sendmail-style $msgs{$msgno}{deliveries}{$dlvno}{stat} = $stat eq 'success' ? 'sent' : ( $stat eq 'deferral' ? 'deferred' : $stat ); if (defined $msgs{$msgno}{deliveries}{$dlvno}{xstat}) { lr_info( "found line '$_', but status line for " . "delivery '$dlvno' already defined: " . "overwriting"); } $msgs{$msgno}{deliveries}{$dlvno}{xstat} = $xstat; if (defined $msgs{$msgno}{deliveries}{$dlvno}{time}) { lr_info( "found line '$_', but time for delivery " . "'$dlvno' already defined: overwriting" ); } $msgs{$msgno}{deliveries}{$dlvno}{time} = $time; $found = 1; last; } } lr_info( "cannot find delivery '$dlvno', so skipping line '$_'" ) unless $found; } elsif ($line[0] eq 'status:') { ; } elsif ($line[0] eq 'warning:') { ; } elsif ($line[0] eq 'bounce') { ; # skip this one: no sendmail or postfix counterpart } else { lr_warn( "skipping line '$_': first field should be one of ". "'new', 'info', 'starting', 'delivery', 'end', 'bounce', " . "'status:' or 'warning:', not '". $line[0] . "'"); $errorlines++; next; } } end_dlf_converter( $lines, $dlflines, $errorlines ); __END__ =pod =head1 NAME qmail2dlf - convert sanitized qmail-send logs to dlf =head1 SYNOPSIS B =head1 DESCRIPTION B reads qmail-send logs from stdin, writes dlf to stdout, and, in case of errors, complains on stderr. =head1 TIMESTAMPS We expect timestamps which look like e.g. `977359048.466280500' in our log: number of seconds since the epoch, in any precision. The log should feature lines which look something like: 982584201.511524 info msg 6426: bytes 3537 from qp 21089 uid 70 I, as distributed with qmail, writes `a numerical timestamp', ie something like 972802273.627578, indicating seconds and nanoseconds since the beginning of 1970. I, as distributed with daemontools, I, if invoked with action I, writes a `@' and a I format timestamp. These timestamps look like e.g. 4000000039ef8532346bb35c. Note that 0x400000000000000000000000 is 2^62. One can get the current time in I format by doing: echo 40000000`(echo obase = 16; date +%s) | bc`00000000 (assuming GNU date is installed) To convert I to numerical timestamps, one can use Russ Allbery's tai64nfraq. It's in the public domain, available from I. =head1 EXAMPLE To process a log as produced by splogger: $ lr_desyslog qmail < mail.log | qmail2dlf To process a log as produced by multilog: $ tai64nfrac < current | qmail2dlf qmail2dlf(1) will be rarely used on its own, but is more likely called by lr_log2report: $ tai64nfrac < /service/qmail-send/log/main/current | \ lr_log2report qmail =head1 BUGS We don't deal with bounces: email qmail none qmail2dlf info skipping line '986244190.800217 bounce msg 6306 qp 28445': first field should be one of 'new', 'info', 'starting', 'delivery', 'end' or 'status:', not 'bounce' qmail2dlf doesn't use any information from the qmail-smtpd(1) logs. (These look like 2002-06-16 09:34:57.798038500 tcpserver: pid 19385 from 100.61.24.7 2002-06-16 09:34:58.114198500 tcpserver: ok 19385 foo.example.com:100.163.25.11:25 logreport.iae.nl:212.61.24.7:postfix:1189 .) =head1 VERSION $Id: qmail2dlf.in,v 1.24 2006/07/23 13:16:34 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2000, 2001 Stichting LogReport Foundation LogReport@LogReport.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 AUTHOR Joost van Baal =cut # Local Variables: # mode: cperl # End: