#! @PERL@ -w # vim:syntax=perl use strict; use lib '@LR_PERL5LIBDIR@'; use Lire::DlfSchema; use Lire::Email; use Lire::Program qw( :msg :dlf ); # 1 is unassigned # my $debug_print_dlf = 2; my $debug_sanitize = 4; # $debug is bitmask # # my $debug = $debug_sanitize; my $debug = 0; my $schema = eval { Lire::DlfSchema::load_schema( "email" ) }; lr_err( "failed to load email schema: $@" ) if $@; my $dlf_maker = $schema->make_hashref2asciidlf_func( qw/time logrelay queueid msgid from_user from_domain from_relay_host from_relay_ip size delay xdelay to_user to_domain to_relay_host to_relay_ip stat xstat /); my $lines = 0; my $dlflines = 0; my $errorlines = 0; # keep track of being processed dlfid's (a dlfid is a hostname . queueid ) my %msg = (); # # merge hashes old and current in old # sub parse_sendmail_log { my ( $dlfid, $log ) = @_; my $time = $log->{timestamp}; if (defined $log->{'from'}) { # Skip SYSERR line # XXX: Does this send an email. We get nrcpts=1 after those. # Maybe we should set to_user in this case. return if $log->{content} =~ / SYSERR\(root\):/; my $cur = $msg{$dlfid} ||= { time => $time, logrelay => $log->{hostname}, queueid => $log->{queueid}, }; my $from; sanitize('emailadress', $log->{from}, $from ); ($cur->{from_user}, $cur->{from_domain}) = splitemailadress( $from ); die "fromline without a relay field\n" unless defined $log->{relay}; sanitize('relay', $log->{relay}, $log->{relay}); my ( $fromrelayhost, $fromrelayip) = splitrelay( $log->{relay} ); sanitize('relayhost', $fromrelayhost, $cur->{from_relay_host}); sanitize('relayip', $fromrelayip, $cur->{from_relay_ip}); die "fromline without a size field\n" unless defined $log->{size}; $cur->{size} = $log->{size}; # It is possible that nrcpts will be set to 0. This happens for # from line following check_*. But it can also happens on other # "normal" occasion for reason I don't know. Those will only be output # at the end. die "fromline without a nrcpts field\n" unless defined $log->{nrcpts}; $cur->{nrcpts} = $log->{nrcpts} || -1; # -1 = Guard value if ( $log->{msgid} ) { sanitize('msgid', $log->{msgid}, $cur->{msgid}); } else { lr_debug( "interesting line '$_': fromline without a msgid") if $debug; } } elsif (defined $log->{'to'}) { lr_debug "gonna sanitize_tos " . $log->{'to'} if $debug & $debug_sanitize; my $cur = $msg{$dlfid}; unless ( $cur ) { $cur = $msg{$dlfid} = { time => $time, logrelay => $log->{hostname}, queueid => $log->{queueid}, nrcpts => -1, # Guard value }; } # cur{'tos'} is gonna point to an array of adresses my $tos = sanitize_tos($log->{to}); lr_debug "sanitized tos to '@$tos'" if $debug & $debug_sanitize; # the following code snippet would yield a from address, even when # none is found in a regular from= line, based upon the ctladdr in # a to= line. We have seen lots of logs where ctladdr is the actual # sending user. E.g. # # PAA01703: from= # PAA01703: to=, ctladdr= # # However: # ' to="|exec /usr/local/bin/procmail", ctladdr=joe@droopey ' # means joe@droopey is the _receiving_ user. Therefore, we will no # longer use this trick: apparently, contents of ctladdr doesn't give # any reliable information.... :( # if ( exists $log->{ctladdr} && ! $cur->{from_user} ) { # # Determine from_user based on the controlling address # my $from; # sanitize('emailadress', $log->{ctladdr}, $from ); # # ($cur->{from_user}, $cur->{from_domain}) = splitemailadress($from); # } foreach my $to ( @$tos ) { my $del = $cur->{deliveries}{$to}; unless ( $del ) { $del = $cur->{deliveries}{$to} = {}; ($del->{to_user}, $del->{to_domain}) = splitemailadress($to); } if (defined $log->{'relay'}) { my $relay; sanitize('relay', $log->{'relay'}, $relay ); my ( $torelayhost, $torelayip) = splitrelay( $relay ); sanitize('relayhost', $torelayhost, $del->{to_relay_host}); sanitize('relayip', $torelayip, $del->{to_relay_ip}); } elsif (defined $log->{'mailer'} && ( $log->{'mailer'} eq 'local' || $log->{'mailer'} eq 'prog') ) { my ( $torelayhost, $torelayip) = splitrelay("localhost"); sanitize('relayhost', $torelayhost, $del->{to_relay_host}); sanitize('relayip', $torelayip, $del->{to_relay_ip}); } for my $flag ('delay', 'xdelay') { if (defined $log->{$flag}) { sanitize('delay', $log->{$flag}, $del->{$flag}); lr_debug( "sanitized $flag '", $log->{$flag}, "' to '", $del->{$flag}, "'" ) if $debug & $debug_sanitize; } } die "to-line without a stat field\n" unless defined $log->{stat}; my $stat; sanitize( 'stat', $log->{'stat'}, $stat); ($del->{stat}, $del->{xstat}) = splitstat( $stat ); if ( $del->{stat} ne "queued" && $del->{stat} ne 'deferred' ) { $dlflines += print_dlf( $cur, $dlf_maker, \%msg, $to ); } } } elsif ( defined $log->{ruleset} ) { my $cur = $msg{$dlfid} ||= { time => $time, logrelay => $log->{hostname}, queueid => $log->{queueid}, }; sanitize( 'stat', $log->{ruleset}, $cur->{stat}); sanitize( 'stat', $log->{reject}, $cur->{xstat}); } elsif ( $log->{content} =~ /^([0-9a-zA-Z]{12}): ([0-9a-zA-Z]{12}):/ ) { # Handle DSN, sender notify, postmaster notify, etc. # Oct 31 14:10:32 talyn sendmail[29217]: f9VNAM129215: f9VNAW129217: # postmaster notify: User unknown # Oct 31 14:10:33 talyn sendmail[29217]: f9VNAW129217: to=root, # delay=00:00:01, xdelay=00:00:00, mailer=local, pri=33136, # dsn=2.0.0, stat=Sent # Nov 1 11:30:15 talyn sendmail[1982]: fA1KUDK01980: fA1KUFK01982: # DSN: Return receipt # Nov 1 11:30:15 talyn sendmail[1982]: fA1KUFK01982: # to=, delay=00:00:00, # xdelay=00:00:00, mailer=esmtp, pri=30100, relay=[10.20.4.42] # [10.20.4.42], dsn=2.0.0, stat=Sent (OK) $dlfid = $log->{hostname} . $2; my $new = $msg{$dlfid} = { logrelay => $log->{hostname}, queueid => $2, time => $time, nrcpts => -1, # Guard value }; sanitize('emailadress', "<>", $new->{from} ); ($new->{from_user}, $new->{from_domain}) = splitemailadress( $new->{from} ); sanitize('relay', "localhost", $new->{relay}); my ( $fromrelayhost, $fromrelayip) = splitrelay( $new->{relay} ); sanitize('relayhost', $fromrelayhost, $new->{from_relay_host}); sanitize('relayip', $fromrelayip, $new->{from_relay_ip}); } else { # we don't wanna hear about this. happens quite often in typical # logfiles. # lr_warn "'$_': to- nor from-line: skipping\n"; } } # We don't sort on Queuid because there is no warranty that # the queueid of related messages will sort in order. Sendmail # will use lines like OLD_QUEUEID : NEW_QUEUEID for bounce and DSN message # and we NEW_QUEUEID could sort before OLD_QUEUEID and some information about # NEW_QUEUEID would be missing. my $parser = new Lire::Email(); init_dlf_converter( "email" ); while ( <> ) { lire_chomp; $lines++; lr_debug "next line '$_'" if $debug; my $log; eval { $log = $parser->parse($_); }; if ( $@ ) { lr_warn( $@ ); $errorlines++; next; } my $dlfid; if (defined $log->{'queueid'}) { if (defined $log->{'hostname'}) { $dlfid = $log->{'hostname'} . $log->{'queueid'}; } else { $log->{'hostname'} = ''; $dlfid = $log->{'queueid'}; } } else { # we don't wanna hear about this. # lr_warn "skipping line '$_': no queueid\n"; next; } eval { parse_sendmail_log( $dlfid, $log ); }; if ( $@ ) { lr_warn( $@ ); lr_warn( "error parsing line '$_' as a sendmail log. Skipping" ); $errorlines++; next; } } # flush other messages foreach my $dlfid ( keys %msg ) { $dlflines += print_dlf( $msg{$dlfid}, $dlf_maker, \%msg ); } end_dlf_converter( $lines, $dlflines, $errorlines ); __END__ =pod =head1 NAME sendmail2dlf - convert sendmail logfiles to dlf =head1 SYNOPSIS B =head1 DESCRIPTION sendmail2dlf(1) converts a LogLevel 9 sendmail (8.10.x or higher) logfile, as created using syslog, to a Lire email Distilled Log Format file. Input is one line per event. Outputted is one line per delivery: time logrelay queueid msgid fromuser fromdomain fromrelay \ size delay xdelay touser todomain torelay stat (This should be the format as defined in email/dlf.cfg.) =head1 EXAMPLE The lines Apr 20 03:00:11 firewall sendmail[442]: DAA00442: \ from=, size=4992, class=0, \ pri=34992, nrcpts=1, \ msgid=<200004192316.BAA19611@achilles.noot.com>, \ proto=ESMTP, relay=host.example.nl [150.0.0.45] Apr 20 03:00:11 firewall sendmail[442]: DAA00442: \ to=, delay=00:00:00, mailer=smtp, \ stat=queued Apr 20 05:00:11 firewall sendmail[503]: DAA00442: \ to=, delay=02:00:00, \ xdelay=00:00:03, mailer=smtp, relay=mailgw.aap.com. \ [3.4.64.199], stat=Sent (OK id=12i7CN-0001Kv-00) wil be converted to 956109611 firewall DAA00442 \ <200004192316.baa19611@achilles.noot.com> user \ example.com host.example.nl_[150.0.0.45] 4992 0 0 \ jan aap.com host.example.nl._[150.0.0.45] queued \ UNKNOWN 956116811 firewall DAA00442 \ <200004192316.baa19611@achilles.noot.com> user \ example.com host.example.nl_[150.0.0.45] 4992 \ 7200 3 jan aap.com mailgw.aap.com._[3.4.64.199] \ sent (ok_id=12i7cn-0001kv-00) The lines Mar 17 13:34:32 mailhost sendmail[8408]: NAA08408: \ from=, size=1890, class=0, \ pri=0, nrcpts=4, \ msgid=<000b01bf9009$f6885b20$6c062014@sabepc06.be.example.com>, \ proto=ESMTP, relay=root@[1.2.6.10] Mar 17 13:45:26 mailhost sendmail[8457]: NAA08408: \ to=lkrksen@www, delay=00:10:56, xdelay=00:00:01, \ mailer=smtp, relay=www.example.nl. [194.229.43.3], \ stat=Sent (NAA06261 Message accepted for delivery) \ Mar 17 13:45:27 mailhost sendmail[8457]: NAA08408: \ to=ll@host.example.com, delay=00:10:57, \ xdelay=00:00:01, mailer=smtp, relay=host.example.nl. \ [150.0.0.45], stat=Sent (OK) Mar 17 13:45:31 mailhost sendmail[8457]: NAA08408: \ to=,,, \ delay=00:11:01, xdelay=00:00:04, mailer=smtp, \ relay=mailgw.aap.com. [3.4.64.199], stat=Sent (OK \ id=12Vw8J-0001iT-00) will be converted to 953210726 mailhost NAA08408 \ <000b01bf9009$f6885b20$6c062014@sabepc06.be.example.com>\ piet example.com root@[1.2.6.10] 1890 656 1 lkrksen \ www www.example.nl._[194.229.43.3] sent \ (naa06261_message_accepted_for_delivery) 953210727 mailhost NAA08408 \ <000b01bf9009$f6885b20$6c062014@sabepc06.be.example.com> \ piet example.com root@[1.2.6.10] 1890 657 1 ll \ host.example.com host.example.nl._[150.0.0.45] sent (ok) 953210731 mailhost NAA08408 \ <000b01bf9009$f6885b20$6c062014@sabepc06.be.example.com> \ piet example.com root@[1.2.6.10] 1890 661 4 mvelsla \ aap.com mailgw.aap.com._[3.4.64.199] sent \ (ok_id=12vw8j-0001it-00) 953210731 mailhost NAA08408 \ <000b01bf9009$f6885b20$6c062014@sabepc06.be.example.com> \ piet example.com root@[1.2.6.10] 1890 661 4 pvhove \ aap.com mailgw.aap.com._[3.4.64.199] sent \ (ok_id=12vw8j-0001it-00) 953210731 mailhost NAA08408 \ <000b01bf9009$f6885b20$6c062014@sabepc06.be.example.com> \ piet example.com root@[1.2.6.10] 1890 661 4 pdebaerd \ aap.com mailgw.aap.com._[3.4.64.199] sent \ (ok_id=12vw8j-0001it-00) The lines Mar 15 13:34:09 firewall sendmail[279]: NAA00279: \ from=, size=2281952, class=0, \ pri=2311952, nrcpts=1, \ msgid=<200003151230.NAA00112@mailhost.example.nl>, \ proto=ESMTP, relay=host.example.nl [150.0.0.45] Mar 15 13:34:09 firewall sendmail[279]: NAA00279: \ to=, delay=00:00:04, mailer=smtp, \ stat=queued Mar 15 13:39:58 firewall sendmail[401]: NAA00279: \ to=, delay=00:05:53, xdelay=00:00:06, \ mailer=smtp, relay=mc5.law5.hotmail.com. \ [216.32.243.136], stat=Service unavailable Mar 15 13:39:58 firewall sendmail[401]: NAA00279: \ NAA00401: postmaster notify: Service unavailable Mar 15 13:40:04 firewall sendmail[401]: NAA00401: \ to=klaas@host.example.com, delay=00:00:06, \ xdelay=00:00:04, mailer=smtp, relay=host.example.nl. \ [150.0.0.45], stat=Sent (OK) will be converted to 953037249 firewall NAA00279 \ <200003151230.naa00112@mailhost.example.nl> klaas \ example.com host.example.nl_[150.0.0.45] 2281952 4 1 \ klaas hotmail.com mailgw.csc.com._[208.219.64.199] \ queued UNKNOWN 953037598 firewall NAA00279 \ <200003151230.naa00112@mailhost.example.nl> klaas \ example.com host.example.nl_[150.0.0.45] 2281952 353 6 \ klaas hotmail.com mc5.law5.hotmail.com._[216.32.243.136] \ service unavailable The fact that the delivery 'Mar 15 13:40:04 firewall sendmail[401]: NAA00401:' does not generate a dlf record is a bug. When the line Mar 15 19:39:40 mailhost sendmail[2178]: TAA02178: \ from=, size=0, class=0, pri=0, \ nrcpts=0, proto=SMTP, relay=[1.84.7.150] occurs in the input, and there is no line carrying the same queueid, the line is discarded, and reported as skipped: any to- or from- line, lacking any partner, will get discarded. Lines like: Mar 15 13:40:19 firewall sendmail[456]: alias database \ /etc/aliases.db out of date wil get discarded =head1 EXAMPLES To process a log as produced by sendmail: $ sendmail2dlf < mail.log sendmail2dlf will be rarely used on its own, but is more likely called by lr_log2report: $ lr_log2report sendmail < /var/log/maillog =head1 BUGS When queueids are being reused within one logfile, behaviour is unpredictable. Incomplete logsnippets (e.g. from-lines without to-lines) are not treated well. It is reported events like this occur in sendmail log files: SAA14845: from=<>, size=146990, class=0, pri=176990, nrcpts=1, msgid=<092246.09986@rly-yc03.mx.aol.com>, proto=ESMTP, relay=omr-d06.mx.aol.com [205.188.156.71] SAA14845: to=joe@mailhost, ctladdr=, delay=00:00:01, mailer=local, stat=User unknown SAA14845: to=, delay=00:00:01, mailer=local, stat=User unknown SAA14845: SAA14846: postmaster notify: User unknown SAA14846: to="|exec /usr/local/bin/procmail", ctladdr=ann@mailhost (2217/10), delay=00:00:00, xdelay=00:00:00, mailer=prog, stat=Sent SAA14846: to=bob@imap-ns, delay=00:00:01, xdelay=00:00:01, mailer=esmtp, relay=apex.example.edu. [152.19.4.80], stat=Sent (Message received: GVV8N400.CMX) SAA14846: to=eve@mailhost, delay=00:00:01, xdelay=00:00:00, mailer=local, stat=Sent Note that SAA14845 has _two_ final to= lines, while the from= line states nrcpts=1. This blows the axiom of this script away. We haven't decided yet on how to deal with this... =head1 THANKS Edward Eldred, for finding and reporting a bug. =head1 VERSION $Id: sendmail2dlf.in,v 1.32 2006/07/23 13:16:34 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2000, 2001, 2002 Stichting LogReport Foundation LogReport@LogReport.org This program is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 AUTHOR Joost van Baal =cut # Local Variables: # mode: cperl # End: