#! @PERL@ -w # vim:syntax=perl =pod =head1 NAME s1ms2dlf - convert SunONE Messaging Server SMTP log files to the email DLF =head1 SYNOPSIS B =head1 DESCRIPTION This program converts a SunONE Messaging Server log file generated by the SMTP service to the Lire email DLF. The log file is expected on stdin, the DLF file is printed to stdout. In order to correctly process the log file, you need to turn on logging of the following channels: FIXME =head1 LIMITATIONS This DLF converter was developed for the SunONE Messaging Server version 5.2. Other versions may or may not work. Contact the LogReport developers if you have problems with that converter. You can find information about the log format used by SunONE Messaging Server at the following URL: http://docs.sun.com/source/816-6009-10/logging.htm =head2 CONFIGURING LOGGING OF MAILBOX-DELIVER MODULE We found that there are no logs from the Mailbox-Delivery module by default. In order to fix that problem please make sure you do the following: =over =item 1 Go on the SunONE Messaging Server MTA or Message Store =item 2 Become the Mail Server user (usually this should not be root but typically a user like mailsrv or whatever was defined at installation time, check for the ownership of the files in your message server instance for example) =item 3 Go to the right location, something like: I/msg-instance For example it could be: F =item 4 Do: TO BE WRITTEN =head1 EXAMPLES To process a log as produced by Netscape Messaging Server $ s1ms2dlf < mail.log s1ms2dlf will be rarely used on its own, but is more likely called by lr_log2report: $ lr_log2report nms < /var/log/mail.log =head1 VERSION $Id: s1ms2dlf.in,v 1.5 2006/07/23 13:16:34 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2002 Stichting LogReport Foundation Copyright (C) 2002 Arnaud Taddei Copyright (C) 2002, 2003 Arnaud Gaillard Copyright (C) 2002, 2003 Jean-Yves Monnier This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 AUTHORS Francis J. Lacoste , Arnaud Taddei , Arnaud Gaillard and Jean-Yves Monnier =cut use strict; use lib '@LR_PERL5LIBDIR@'; use File::Basename; use Lire::DlfSchema; use Lire::Email qw/sanitize splitemailadress splitrelay/; use Lire::Syslog; use Lire::Program qw/ :msg :dlf /; # The following variables are global to the entire script. In particular: # @accept_queue this array lists the references of the # messages tables for which we have # an SMTP ACCEPT line in the log. This allows the # detection of orphan lines especially at the # beginning of the log # # %deferred a table which has deferred messages as values. # # $error_on_msg # # $error_envid # # $start_time # # $dlf_maker # # $dlflines use vars qw/ @accept_queue %deferred $error_on_msg $error_envid %error_headers @hed_list $hed_index $start_time $dlf_maker $dlflines $debug $mailbox_deliver_check/; $mailbox_deliver_check = 0; #----------------------------------------------------------------------- # Function print_dlf # This program takes a reference to a message table of the form # 'delay' => 1 # 'from' => '' # 'from_domain' => 'amail1.iorange.ch' # 'from_relay_host' => '192.168.40.120' # 'from_relay_ip' => '192.168.40.120' # 'from_user' => 'root' # 'logrelay' => 'amail1' # 'msgid' => '<20020219230013.AAA23658+3977504@amail1.iorange.ch>' # 'nrcpt' => 1 # 'queueid' => 'GRSYKQ01.900' # 'rcpt' => '' # 'size' => 839 # 'stat' => 'unknown error' # 'time' => 1014159626 # 'to_relay_host' => 'localhost' # For each rcpt element it will: # - sanitize the recipient address # - split the recipient address into 2 new attributes of the message # table above: # * the recipient user part called 'to_user' # * the recipient domain part called 'to_domain' # - Once this is done, it builds a DLF record called $rec, using the # dlf_maker object based on the message table that we just updated # with the to_user and to_domain fields. This means that we create a # list of 17 items, typically: # 0 1022709631 # 1 'amail1' # 2 'GWW7SU00.L01' # 3 '<20020529220015.AAA8615+3977504@amail1.iorange.ch>' # 4 'root' # 5 'amail1.iorange.ch' # 6 '192.168.40.120' # 7 '192.168.40.120' # 8 838 # 9 0 # 10 'LIRE_NOTAVAIL' # 11 'root' # 12 'amail1.iorange.ch' # 13 'localhost' # 14 'LIRE_NOTAVAIL' # 15 'unknown_error' # 16 'LIRE_NOTAVAIL' # - eventually we print the join of the elements of the record into the # STDOUT which is likely to be an intermediary DLF file. sub print_dlf { my ( $msg ) = @_; foreach my $to ( split ",", $msg->{rcpt} ) { my $email; sanitize( "emailadress", $to, $email ); ( $msg->{to_user}, $msg->{to_domain} ) = splitemailadress( $email); my $rec = $dlf_maker->( $msg ); my @rec = @$rec; # FIXME: When can this happen? if ($#rec < 0) { trace( "*** ERROR in PRINT DLF 0 fields" ); } else { print join( " ", @rec ), "\n"; } $dlflines++; } } #----------------------------------------------------------------------- # Function print_deferred_messages # This function looks for the messages which are in the deferred table. # For each of these messages, the delay attribute of each message is # calculated and the message is printed to the DLF file. sub print_deferred_messages { my ( $time ) = @_; foreach my $msg ( values %deferred ) { $msg->{delay} = $time - $msg->{time}; print_dlf( $msg ); } %deferred = (); } #----------------------------------------------------------------------- # Function Print_Server_Messages # A function to dump server messages sub Print_Server_Messages { my($list) = @_; lr_debug( < 0; $i--) { my $msg = $accept_queue[$i-1]; # print "\nfind_msg_in_accept_queue[$i] : msgid $msg->{msgid}, from $msg->{from}\n"; if ( $msg->{msgid} eq $msgid ) { if ( !$msg->{from} || !$from ) { return $i - 1; } else { if ( $msg->{from} eq $from ) { return $i - 1; } } } } return -1; } #----------------------------------------------------------------------- # Function find_msg_in_accept_queue_by_qid # This function returns a message reference if the message-id field of # the latter and the from field are EQUAL to the qid and msgid # If there is no pattern it returns -1; sub find_msg_in_accept_queue_by_qid { my ( $msgid, $qid ) = @_; for (my $i=@accept_queue; $i > 0; $i--) { my $msg = $accept_queue[$i-1]; if ( $msg->{msgid} eq $msgid && $msg->{queueid} eq $qid ) { return $i - 1; } } return -1; } #----------------------------------------------------------------------- # Function correct_fields # Since splitting the record is 81% faster than parsing it using # m//, we use a error detection and correction schema rather than # using the solution which is more expensive but give always the # good result. # Benchmark: timing 50000 iterations of parse_match, parse_split... # parse_match: 3 wallclock secs ( 2.81 usr + 0.00 sys = 2.81 CPU) @ 17793.59/s (n=50000) # parse_split: 2 wallclock secs ( 1.55 usr + 0.00 sys = 1.55 CPU) @ 32258.06/s (n=50000) # Rate parse_match parse_split # parse_match 17794/s -- -45% # parse_split 32258/s 81% -- sub correct_fields { my ( $fields, $module, $expected ) = @_; my $str = join ":", @$fields; $str =~ s/\s+\(.*\)//g; # Indeed we have field elements like: # <3D17A93C00282D9E@freesurfmta05.sunrise.ch> # (added by postmaster@freesurf.ch) my @fields = $str =~ m/((?:<[^>]+?>,?)+|[^:]+)/g; if (@fields != $expected) { die "invalid $module record: should contains $expected fields but ", "has ", scalar @fields, " after correction\n"; } return \@fields; } #----------------------------------------------------------------------- # Function smtp_enqueued_event sub smtp_enqueued_event { my ( $log ) = @_; my $relay_host; my $relay_ip; # fields[0] <- size in blocks # fields[1] <- envelope From: address # fields[2] <- original To: address (preceded by "rfc822;") # fields[3] <- active To: address # fields[4] <- queue filename # fields[5] <- message ID # fields[6] <- user executing process # remainder # fields[7] <- sending system: fdqn/domain # fields[8] <- sending system: (fdqn/localhost [@IP]) my ( @fields ) = split ' ', $log->{content}, 9; # print "\nsmtp_enqueued_event\n"; # print "fields[0] $fields[0]\n"; # print "fields[1] $fields[1]\n"; # print "fields[2] $fields[2]\n"; # print "fields[3] $fields[3]\n"; # print "fields[4] $fields[4]\n"; # print "fields[5] $fields[5]\n"; # print "fields[6] $fields[6]\n"; # print "fields[7] $fields[7]\n"; # Fields order is # envelopeID msgID peerAddress peerHost mailFrom msgSize \ # numRecipients recipientList # In the documentation, msgID and mailFrom are switched. # $fields = correct_fields( $fields, "SMTP-Accept", 8 ) # if @$fields != 8; my $from; # if From: is an empty field, "<>" due to LOG_FORMAT=2 if ( $fields[1] eq "<>" ) { $fields[1] = ""; } # if From: is missing, field beginning with "rfc822;" is original To: if ( $fields[1] =~ m/rfc822;/ ) { $fields[8] = $fields[7]; $fields[7] = $fields[6]; $fields[6] = $fields[5]; $fields[5] = $fields[4]; $fields[4] = $fields[3]; $fields[3] = $fields[2]; $fields[2] = $fields[1]; $fields[1] = ""; } # if MTA queue filename is missing, field surrounded by "< >" is msgID if ( $fields[4] =~ m/^<.*>$/ ) { $fields[9] = $fields[8]; $fields[8] = $fields[7]; $fields[7] = $fields[6]; $fields[6] = $fields[5]; $fields[5] = $fields[4]; $fields[4] = ""; } sanitize( "emailadress", $fields[1], $from ); my ( $user, $host ) = splitemailadress( $from ); my $dlf = { time => $log->{timestamp}, logrelay => ($fields[4]) ? guess_relay($fields[4]) : "localhost", queueid => ($fields[4]) ? basename($fields[4],"") : "queueid", msgid => $fields[5], from => $fields[1], from_user => $user, from_domain => $host, size => $fields[0], # Non DLF mapped fields nrcpt => 1, rcpt => $fields[3], }; # (host @IP) host could be a fdqn, IPV4 address, WHAT ABOUT IPV6 ???? if ($fields[8]) { ($relay_host) = ( $fields[8] =~ m/\(([a-zA-Z0-9_\-\.]+)/ ); if ($fields[9]) { ($relay_ip) = ( $fields[9] =~ m/\[([0-9\.]{7,15})\]\)/ ); } } else { if ($fields[7]) { $relay_host = $fields[7]; } else { $relay_host="localhost"; } } if (! $relay_host) { $relay_host="localhost"; } elsif ( $relay_host =~ m/in-addr.arpa/ ) { if ($fields[7]) { $relay_host = $fields[7]; } } if (! $relay_ip ) { $relay_ip="127.0.0.1"; } #print "fields[8] $fields[8], relay_ip $relay_ip, relay_host $relay_host\n"; sanitize( "relayhost", $relay_host, $dlf->{from_relay_host} ); sanitize( "relayip", $relay_ip, $dlf->{from_relay_ip} ); #print "sanitize dlf : relay_ip $dlf->{from_relay_ip}, relay_host $dlf->{from_relay_host}\n"; # # print "dlf->size $dlf->{size}\n"; # print "dlf->queueid $dlf->{queueid}\n"; # print "dlf->msgid $dlf->{msgid}\n"; # print "dlf->from $dlf->{from}\n"; # print "dlf->ip-relay $dlf->{from_relay_ip}\n"; # print "dlf->host_relay $dlf->{from_relay_host}\n"; push @accept_queue, $dlf; # Check if the accept_queue seems to be growing without bounds. # If we have more than 50 messages pending on the @accept_queue, # it is really probable that logging in the Mailbox-Deliver module # wasn't turned on. if ( !$mailbox_deliver_check && @accept_queue > 50 ) { $mailbox_deliver_check = 1; lr_warn( "More than 50 incoming messages not yet delivered. Did you turned on service.smtp.mailbox-deliver.log?" ); } } #----------------------------------------------------------------------- # Function smtp_deliver_event sub smtp_deliver_event { my ( $log ) = @_; # fields[0] <- size in blocks # fields[1] <- envelope From: address # fields[2] <- original To: address (preceded by "rfc822;") # fields[3] <- active To: address # fields[4] <- queue filename (could be missing : see LOG_FILENAME) # fields[5] <- message ID # fields[6] <- user executing process # fields[7] <- relay host # fields[8] <- dns;relay host (TCP|src@IP|port|dst@IP|port) # fields[9] <- relay host message # ou bien !!! # fields[7] <- dns;relay host (TCP|src@IP|port|dst@IP|port) # fields[8] <- relay host message # ou bien : nothing !!! my ( @fields ) = split ' ', $log->{content}; if ( $fields[1] eq "<>" ) { $fields[1] = ""; } # if From: is missing, field beginning with "rfc822;" is original To: if ( $fields[1] =~ m/rfc822;/ ) { $fields[8] = $fields[7]; $fields[7] = $fields[6]; $fields[6] = $fields[5]; $fields[5] = $fields[4]; $fields[4] = $fields[3]; $fields[3] = $fields[2]; $fields[2] = $fields[1]; $fields[1] = ""; } # if MTA queue filename is missing, field surrounded by "< >" is msgID if ( $fields[4] =~ m/^<.*>$/ ) { $fields[9] = $fields[8]; $fields[8] = $fields[7]; $fields[7] = $fields[6]; $fields[6] = $fields[5]; $fields[5] = $fields[4]; $fields[4] = ""; } if (! $fields[7] ) { $fields[7] = "localhost"; } elsif ( $fields[7] =~ m/^dns;/ ) { $fields[7] =~ s/^dns;//; } if ( $fields[8] && $fields[9] ) { if ( $fields[8] =~ m/^dns;/ && $fields[9] =~ m/^(TCP|.*)$/ ) { $fields[8] = $fields[9]; } } # print "\nsmtp_deliver_event\n"; # print "size fields[0] $fields[0]\n"; # print "from fields[1] $fields[1]\n"; # print "oTo fields[2] $fields[2]\n"; # print "rTo fields[3] $fields[3]\n"; # print "queue fields[4] $fields[4]\n"; # print "msgID fields[5] $fields[5]\n"; # print "user fields[6] $fields[6]\n"; # printf "relay fields[7] %s\n", $fields[7] ? $fields[7] : "NULL"; # printf "adIP fields[8] %s\n", $fields[8] ? $fields[8] : "NULL"; # # Fields order is: # envelopeID msgID status destHost mailFrom msgSize \ # numRecipients recipientList # In the documentation, msgID and mailFrom are switched. # $fields = correct_fields( $fields, "SMTP-Deliver", 8 ) # if @$fields != 8; my $msg = $deferred{$fields[0]}; unless ( defined $msg ) { # search with msgID and From: my $i = find_msg_in_accept_queue( $fields[5], $fields[1] ); if ( $i >= 0 ) { smtp_relayforward_event( $log, \@fields, $i ); return; } my $email; sanitize( "emailadress", $fields[1], $email ); my ( $user, $host ) = splitemailadress( $email ); # This deliver event doesn't have any corresponding SMTP-Accept # It's either a bounce or its SMTP-Accept event was in a previous # log file $msg = { logrelay => ($fields[4]) ? guess_relay($fields[4]) : "localhost", queueid => ($fields[4]) ? basename($fields[4],"") : "queueid", msgid => $fields[5], from_user => $user, from_domain => $host, size => $fields[0], # Non DLF mapped fields nrcpt => 1, rcpt => $fields[3], }; # test if empty From: if ( $fields[1] eq '<>' ) { # Bounce $msg->{time} = $log->{timestamp}; $msg->{from_relay_host} = "localhost"; $msg->{from_relay_ip} = "127.0.0.1"; } else { # Log file rotation $msg->{time} = $start_time; # from_relay_host and from_relay_ip are UNKNOWN } } # unless ( defined $msg ) sanitize( "relayhost", $fields[7], $msg->{to_relay_host} ); #print "smtp_deliver_event after sanitize to_relay_host=$msg->{to_relay_host}\n"; # if ( $fields->[2] eq 'Delivered' ) # if ( $log->{entry} =~ m/^D/ ) { $msg->{stat} = 'sent'; $msg->{delay} = $log->{timestamp} - $msg->{time}; $msg->{nrcpt} = 1; $msg->{rcpt} = $fields[3]; print_dlf( $msg ); # } else { # $msg->{stat} = 'deferred'; # $deferred{$fields[0]} = $msg; # } } #----------------------------------------------------------------------- # Function smtp_failure_event sub smtp_failure_event { my ( $log ) = @_; # fields[0] <- size in blocks # fields[1] <- envelope From: address # fields[2] <- original To: address (preceded by "rfc822;") # fields[3] <- active To: address # fields[4] <- queue filename # fields[5] <- message ID # fields[6] <- user executing process # fields[7] <- error message # could be "Over quota", "Mailbox is busy", # "TCP active open: Failed connect() Error: no route to host", # "TCP active open: Failed connect() Error: connection refused", # "dns;host.some.org (TCP|206.184.139.12|2788|192.1.1.1|25) \ # (All set, fire away) smtp; 250 Ok", # Issue with this last is that it will occurs many days later !!! # split in 8 slices, keep error message in fields[8] my ( @fields ) = split ' ', $log->{content}, 8; # print "\nsmtp_failure_event\n"; # print "fields[0] $fields[0]\n"; # print "fields[1] $fields[1]\n"; # print "fields[2] $fields[2]\n"; # print "fields[3] $fields[3]\n"; # print "fields[4] $fields[4]\n"; # print "fields[5] $fields[5]\n"; # print "fields[6] $fields[6]\n"; # printf "fields[7] %s\n", $fields[7] ? $fields[7] : "NULL"; # printf "fields[8] %s\n", $fields[8] ? $fields[8] : "NULL"; # if (! $fields[7] ) { # $fields[7] = "localhost"; # } if ( $fields[1] eq "<>" ) { $fields[1] = ""; } # if From: is missing, field beginning with "rfc822;" is original To: if ( $fields[1] =~ m/rfc822;/ ) { $fields[8] = $fields[7]; $fields[6] = $fields[5]; $fields[5] = $fields[4]; $fields[4] = $fields[3]; $fields[3] = $fields[2]; $fields[2] = $fields[1]; $fields[1] = ""; } # if MTA queue filename is missing, field surrounded by "< >" is msgID if ( $fields[4] =~ m/^<.*>$/ ) { $fields[9] = $fields[8]; $fields[7] = $fields[6]; $fields[6] = $fields[5]; $fields[5] = $fields[4]; $fields[4] = ""; } # Fields order is: # envelopeID msgID status destHost mailFrom msgSize \ # numRecipients recipientList # In the documentation, msgID and mailFrom are switched. # $fields = correct_fields( $fields, "SMTP-Deliver", 8 ) # if @$fields != 8; my $msg = $deferred{$fields[0]}; unless ( defined $msg ) { # search with msgID and From: my $i = find_msg_in_accept_queue( $fields[5], $fields[1] ); if ( $i >= 0 ) { smtp_relayforward_event( $log, \@fields, $i ); return; } my $email; sanitize( "emailadress", $fields[1], $email ); my ( $user, $host ) = splitemailadress( $email ); # This deliver event doesn't have any corresponding SMTP-Accept # It's either a bounce or its SMTP-Accept event was in a previous # log file $msg = { logrelay => ($fields[4]) ? guess_relay($fields[4]) : "localhost", queueid => ($fields[4]) ? basename($fields[4],"") : "queueid", msgid => $fields[5], from_user => $user, from_domain => $host, size => $fields[0], # Non DLF mapped fields nrcpt => 1, rcpt => $fields[3], }; # test if empty From: if ( $fields[1] eq '' || $fields[1] eq '<>' ) { # Bounce $msg->{time} = $log->{timestamp}; $msg->{from_relay_host} = "localhost"; $msg->{from_relay_ip} = "127.0.0.1"; } else { # Log file rotation $msg->{time} = $start_time; # from_relay_host and from_relay_ip are UNKNOWN } } # unless ( defined $msg ) sanitize( "relayhost", $fields[7], $msg->{to_relay_host} ); #print "smtp_deliver_event after sanitize to_relay_host=$msg->{to_relay_host}\n"; # if ( $fields->[2] eq 'Delivered' ) if ( $log->{entry} =~ m/^R/ ) { $msg->{stat} = 'rejected'; $deferred{$fields[0]} = $msg; } else { $msg->{stat} = 'deferred'; $deferred{$fields[0]} = $msg; } } #----------------------------------------------------------------------- # Function smtp_replayforward_event sub smtp_relayforward_event { my ( $log, $fields, $accept_idx ) = @_; my @forward = (); my @relay = (); my $or_msg = $accept_queue[$accept_idx]; # Try to find each recipient in the original recipient list # All recipient that can't be found is a forward (alias) my @or_rcpt = split ",", $or_msg->{rcpt}; foreach my $to ( split ",", $fields->[3] ) { if ( grep { $_ eq $to } @or_rcpt ) { push @relay, $to; } else { push @forward, $to; } } # Remove from accept queue if all relay recipients covers the original # message recipient list if ( @relay >= $or_msg->{nrcpt} ) { splice @accept_queue, $accept_idx, 1; } else { # Remove the relay recipients from the msg on the accept_queue # so that they aren't marked as delivered locally later on my @new_rcpt = (); foreach my $to ( split ",", $or_msg->{rcpt} ) { # Destination will be equal in the case of a relay next if grep { $_ eq $to } @relay; push @new_rcpt, $to; } $or_msg->{rcpt} = join ",", @new_rcpt; $or_msg->{nrcpt} = @new_rcpt; } if ( @relay ) { # Create by copying my %dlf = %$or_msg; sanitize( "relayhost", $fields->[7], $dlf{to_relay_host} ); $dlf{size} = $fields->[0]; if ( $log->{entry} =~ m/^D/ ) { $dlf{stat} = 'sent'; $dlf{delay} = $log->{timestamp} - $or_msg->{time}; $dlf{nrcpt} = @relay; $dlf{rcpt} = join ",", @relay; print_dlf( \%dlf ); } else { $dlf{stat} = 'deferred'; $deferred{$fields->[0]} = \%dlf; } } if ( @forward ) { my %dlf = ( time => $log->{timestamp}, logrelay => $log->{hostname}, queueid => ($fields->[4]) ? basename($fields->[4],"") : "queueid", msgid => $fields->[5], from => $fields->[1], from_user => $or_msg->{from_user}, from_domain => $or_msg->{from_domain}, size => $fields->[0], from_relay_host => "localhost", from_relay_ip => "127.0.0.1", # Non DLF mapped fields nrcpt => scalar @forward, rcpt => join( ",", @forward), ); sanitize( "relayhost", $fields->[7], $dlf{to_relay_host} ); if ( $log->{entry} =~ m/^D/ ) { $dlf{stat} = 'sent'; $dlf{delay} = 0; print_dlf( \%dlf ); } else { $dlf{stat} = 'deferred'; $deferred{$fields->[0]} = \%dlf; } } } #----------------------------------------------------------------------- # Function smtp_deny_event sub smtp_deny_event { my ( $log, $msg ) = @_; my ( $host, $ip ) = $msg =~ /Denied TCP access to (\S+) \[([\d.]+)\]/ or die "can't extract host and ip from Denied TCP event\n"; my $dlf = $dlf_maker->( { time => $log->{timestamp}, logrelay => $log->{hostname}, from_relay_host => $host, from_relay_ip => $ip, to_relay_host => "localhost", stat => "denied tcp access", } ); print join( " ", @$dlf ), "\n"; $dlflines++; } #----------------------------------------------------------------------- # Function Handler_Error_Event # # This function is called when an Handler Error Event line is parsed in # the log sub Handler_Error_Event { my ( $log, $fields ) = @_; trace( "*** Handler_Error_Event IN: $log" ); # Fields error # envelopeID mailFrom size msgID $fields = correct_fields( $fields, "ErrorHandler", 4 ) if @$fields != 4; # Find the message to which this error is related on the # accept_queue my $i = find_msg_in_accept_queue( $fields->[3], $fields->[1] ); die "error_event: can't find message $fields->[3] in \@accept_queue\n" if $i < 0; $error_envid = $fields->[0]; $error_on_msg = $i; Update_HED('error_on_msg',$i); Update_HED_On_Index($hed_index,'message-id', $error_envid); trace( "*** Handler_Error_Event OUT!" ); Debug(); } #----------------------------------------------------------------------- # Function Is_Orphan_Handler_Error # This function is testing if the General Error: ... dump line is # orphan or not. sub Is_Orphan_Handler_Error { if (defined $error_on_msg) { return 0; } else { return 1; } } #----------------------------------------------------------------------- # Function HED_Canonicalisation # The NMS4 logs dump errors in the form # General Error: Attribute: Value # or General Error: Attribute! Value # For the second form we need to canonicalise it back to the first form. # This is the goal of this function. # In addition we sanitise and trim the values. sub HED_Canonicalisation { my($hed_attribute, $hed_value) = @_; if ($hed_attribute =~ /^\s*([-\w]+)! (.*)/) { $hed_attribute = $1; $hed_value = $2 . $hed_value if defined $2; } $hed_attribute =~ s/^\s*//; $hed_value =~ s/\s*$//; if (defined $hed_value ) { $hed_value =~ s/^\s*//; $hed_value =~ s/\s*$//; } $hed_attribute = lc $hed_attribute; return ($hed_attribute, $hed_value); } #----------------------------------------------------------------------- # Function Is_HED_End # # A function to test if the HED is finished sub Is_HED_End { my($hed_attribute) = @_; if ( $hed_attribute =~ /^\s*$/ ) { return 1; } else { return 0; } } #----------------------------------------------------------------------- sub sortbynmuber { my($a,$b); return $a <=> $b; } #----------------------------------------------------------------------- # Function Update_HED # A function to update the HED Attribute and Value in the HED_table. sub Update_HED { my($hed_attribute, $hed_value) = @_; trace( "*** Update_HED IN: att: $hed_attribute val: $hed_value" ); my ($i, $new_hed, $value); if ($hed_attribute eq 'trace') { for ($i = 0; $i < $#hed_list + 1; $i++) { $value = Get_HED($i,'trace'); if (defined $value) { my(@l) = @{${$hed_list[$i]}{'trace'}}; if ($#l < 2) { push (@{${$hed_list[$i]}{'trace'}},$hed_value); last; } else { next; } } else { push (@{${$hed_list[$i]}{'trace'}},$hed_value); last; } } } elsif ($hed_attribute eq 'channel-to') { for ($i = 0; $i < $#hed_list + 1; $i++) { $value = Get_HED($i,'channel-to'); if (defined $value) { my(@l) = @{${$hed_list[$i]}{'channel-to'}}; if ($#l < 2) { push (@{${$hed_list[$i]}{'channel-to'}},$hed_value); last; } else { next; } } else { push (@{${$hed_list[$i]}{'channel-to'}},$hed_value); last; } } } elsif ($hed_attribute eq 'message-id') { for ( $i = 0; $i < $#hed_list + 1; $i++) { $value = Get_HED($i, $hed_attribute); if (defined $value && $value eq $hed_value) { trace( "*** Update_HED QID $hed_value already exists ", "for index $hed_index"); return; } } $hed_index = Add_HED($hed_attribute, $hed_value); } else { $new_hed = 1; for ( $i = 0; $i < $#hed_list + 1; $i++) { $value = Get_HED($i, $hed_attribute); if (defined $value) { next; } else { ${$hed_list[$i]}{$hed_attribute} = $hed_value; $hed_index = $i; $new_hed = 0; last; } } $hed_index = Add_HED($hed_attribute, $hed_value) if ($new_hed); } trace( "*** Update_HED OUT:" ); Debug(); } #----------------------------------------------------------------------- # Function Update_HED_On_Index # # Update HED based on an HED index sub Update_HED_On_Index { my ($pos,$hed_attribute,$hed_value) = @_; trace( "*** Update_HED_On_Index IN: pos: $pos att: ", $hed_attribute, " val: ", $hed_value ); ${$hed_list[$pos]}{$hed_attribute} = $hed_value; trace( "*** Update_HED_On_Index OUT" ); } #----------------------------------------------------------------------- # Function Add_HED # # A function to add a new HED in the error list sub Add_HED { my($hed_attribute, $hed_value) = @_; trace( "*** Add_HED IN: att: $hed_attribute val: $hed_value" ); my %hed = ($hed_attribute, $hed_value); $hed_list[$#hed_list + 1] = \%hed; trace( "*** Add_HED OUT" ); Debug(); return ($#hed_list); } #----------------------------------------------------------------------- # Function Get_HED # # A function to retrieve a value for a given message in the HED table # and for a given attribute sub Get_HED { my($position, $attribute) = @_; my $hed; $hed = $hed_list[$position]; return $$hed{$attribute}; } #----------------------------------------------------------------------- # Function shift2 sub shift2 { my($array) = @_; return splice(@{$array},0,1); } #----------------------------------------------------------------------- # Function Remove_HED # # A function to remove an HED from the error list sub Remove_HED { my($position) = @_; trace( "*** Remove_HED IN: $position" ); if ($position == 0) { @hed_list = pop(@hed_list); } else { splice @hed_list, 0, $position; } # &Debug; } #----------------------------------------------------------------------- # Function HED_Event # # When a line is of the form # # General Error: attribute: value # or General Error: attribute! value # # this function is called and it is going to do several things: # # # - If the line is an orphan line (i.e. there is no error message that # was identified before, for example because the log rotation put the # previous information in another file) we return which means we skip # the line. Actually we should put it into a continuation mechanism # (for the future) # # we are going to store the information in an Handler_HED_table which # will store: # # index_number --> Table # Attribute Value # Attribute Value # # What we need to be careful with # # - the Handler Error Dump Events can be intermixed for different # messages and the difficulty is to make sure we detect correctly to # which QID the event line belongs. # # - to finish an Handler Error Dump and then once the Dump is done, that # we signal it to the calling program to call the processing of the # error # # # The function receives 2 arguments: # - $dump_attribute: This is the attribute name # - $dump_value: This is the value name sub HED_Event { my ( $log, $hed_attribute, $hed_value ) = @_; trace( "*** HED_Event IN: $log $hed_attribute $hed_value" ); if ($hed_value =~ /Logging Error:(.*):SMTP-Router:(.*)/) { my($qid) = $1; my($error) = $2; return; } else { # Here we make sure that the line is not an orphan line return if Is_Orphan_Handler_Error(); # Handle Channel-To! form of the attribute and canonicalise attributes and values ($hed_attribute, $hed_value) = HED_Canonicalisation($hed_attribute, $hed_value); # Here we test if we reached the end of an HED and then we # process the error because it means we have everything to do # it and produce a DLF entry if ( Is_HED_End($hed_attribute)) { Process_HED( $log->{timestamp} ); Remove_HED(0); } # Now for each of the QID messages which are in the HED_table # We assume that the threads are working in sequence so if we # read a line and we read the error tables, the first QID for # which we find that the attribute is empty is the good one. # # Of course some attributes are multivalued. Update_HED($hed_attribute,$hed_value); } trace( "*** HED_Event OUT!" ); Debug(); } #----------------------------------------------------------------------- # Function Process_Unknown_Error sub Process_Unknown_Error { my ($time ) = @_; trace( "*** Process_Unknown_Error IN: $error_on_msg" ); my $msg = $accept_queue[$error_on_msg]; $msg->{delay} = $time - $msg->{time}; $msg->{stat} = "unknown error"; $msg->{to_relay_host} = "localhost"; # Mark all recipients as having an error, altough this isn't # necessarly the case print_dlf( $msg ); splice @accept_queue, $error_on_msg, 1; $error_on_msg = undef; $error_envid = undef; trace( "*** Process_Unknown_Error OUT!" ); Debug(); } #----------------------------------------------------------------------- # Function extract_rcpt sub extract_rcpt { return undef unless defined $_[0]; return $_[0] =~ /^SMTP\s+(<.+>)$/; } #----------------------------------------------------------------------- # Function Process_HED # # The function that extracts information from an HED when this is a # complete one. sub Process_HED { my ($time ) = @_; trace( "*** Process_HED IN: $time $error_on_msg" ); my ($i,$msg,@tos,@stat,$channel_to,$diagcode,$account_to,$host_from); $msg = $accept_queue[$error_on_msg]; if (defined Get_HED($hed_index, 'error_on_msg')) { # Here we used to do a sanity check # The original code was: # die "inconsistent message ID. Expected $error_envid. Found ", # $error_headers{'message-id'}, "\n" # unless $error_headers{'message-id'} eq $error_envid; } else { Process_Unknown_Error($time); return; } $channel_to = Get_HED($hed_index, 'channel-to'); $diagcode = Get_HED($hed_index, 'diagnostic-code'); $account_to = Get_HED($hed_index, 'account-to'); $host_from = Get_HED($hed_index, 'host-from'); if ( ref $channel_to) { for ( $i=0; $i < @{$channel_to}; $i++ ) { my $to = extract_rcpt( ${$channel_to}[$i] ); $to ||= $account_to; push @tos, $to; } push(@stat,$diagcode); } if (! defined $msg->{from_relay_ip} && defined $host_from) { my $relay; sanitize( "relay", $host_from, $relay ); ( $msg->{from_relay_host}, $msg->{from_relay_ip} ) = splitrelay( $relay ); } my $rcpt = $msg->{rcpt}; my %handled = (); $msg->{to_relay_host} = "localhost"; for ($i=0; $i < @tos; $i++ ) { my $to = $tos[$i]; my $stat = lc $stat[$i]; $stat =~ s/^\d*\s*//; # Remove error code $msg->{stat} = $stat; $msg->{delay} = $time - $msg->{time}; $msg->{nrcpt}--; $msg->{rcpt} = $to; $handled{$to} = 1; print_dlf( $msg ); } if ( $msg->{nrcpt} <= 0 ){ splice @accept_queue, $error_on_msg, 1 } else { # Remove recipients which had an error my @new_rcpt = grep { ! $handled{$_} } split ",", $rcpt; $msg->{rcpt} = join ",", @new_rcpt; $msg->{nrcpt} = @new_rcpt; } trace( "*** Process_HED OUT" ); Debug(); } #----------------------------------------------------------------------- # Function Mailbox_Deliver # # A function to fix the delivery of messages to a local mailbox. We can # avoid the 10s heuristic if the right logging level is done in the # configutil of the NMS4 log. # # The entry looks like: # # 'General Notice: Mailbox-Deliver:GZAI2W00.G14::2419:1:C.Tenthorey;' sub Mailbox_Deliver { my($fields,$module,$log) = @_; my($msgidx); # Mailbox-Deliver logging was enabled $mailbox_deliver_check ||= 1; # We parse the entry $fields = correct_fields($fields,'Mailbox-Deliver',5); # We find the message in accept_queue $msgidx = find_msg_in_accept_queue_by_qid($$fields[1],$$fields[0]); if ($msgidx != -1) { # We should write the DLF and remove the accept_queue message $accept_queue[$msgidx]->{to_relay_host} = "localhost"; $accept_queue[$msgidx]->{stat} = "sent"; # In principle we could do a better calculation $accept_queue[$msgidx]->{delay} = $log->{timestamp} - $accept_queue[$msgidx]->{time}; print_dlf($accept_queue[$msgidx]); # Remove from queue splice @accept_queue, 0, $msgidx if $msgidx; } else { # if the mail is not in accept_queue, we must invent a message to be # dumped on the system my($msg); $msg->{to_relay_host} = "localhost"; $msg->{stat} = "sent"; $msg->{msgid} = $$fields[1]; $msg->{nrcpt} = $$fields[3]; $msg->{queueid} = $$fields[0]; $msg->{rcpt} = $$fields[4]; $msg->{size} = $$fields[2]; $msg->{logrelay} = $log->{hostname}; $msg->{delay} = $log->{timestamp} - $start_time; $msg->{time} = $log->{timestamp}; print_dlf($msg); } } #----------------------------------------------------------------------- # Function trace # # Output a debug-level message only when $debug is set sub trace { lr_debug( @_ ) if $debug; } #----------------------------------------------------------------------- # Function Debug # # A function to help debugging the s1ms2dlf code. # sub Debug { if ($debug) { my ($QID,$HEDE) = ('',''); my ($d_hed_index,$d_error_on_msg,$d_error_envid) = ('undef','undef','undef'); $d_hed_index = $hed_index if (defined $hed_index); $d_error_on_msg = $error_on_msg if (defined $error_on_msg); $d_error_envid = $error_envid if (defined $error_envid); my ($d_nb_accept,$d_nb_hed) = ($#accept_queue + 1, $#hed_list + 1); for (my $i = 0; $i <= $#accept_queue; $i++) { $QID = $QID . " " . ${$accept_queue[$i]}{'queueid'} if (defined ${$accept_queue[$i]}{'queueid'}); } for (my $i = 0; $i <= $#hed_list; $i++) { $HEDE = $HEDE . " " . ${$hed_list[$i]}{'error_on_msg'} if (defined ${$hed_list[$i]}{'error_on_msg'}); } lr_debug( <make_hashref2asciidlf_func( qw/time logrelay queueid msgid from_user from_domain from_relay_host from_relay_ip size delay to_user to_domain to_relay_host stat /); my $lines = 0; $dlflines = 0; my $errorlines = 0; $error_on_msg = undef; $error_envid = undef; %error_headers = (); $start_time = 0; my $end_time = 0; my @server_msg = (); $debug = 0; my $parser = new Lire::Syslog; init_dlf_converter( "email" ); my $failed_line = undef; while ( <> ) { my $l = $_; chomp; $lines++; # Look for ^M in the log file which fooled the logging system if ( /\r$/ ) { $failed_line .= $_; next; } elsif ( defined $failed_line) { $_ = $failed_line . $_; $failed_line = undef; } eval { # In the following we are building a reference called $log to a table # of the form: # 'content' => 'Error-Handler:...' # 'facility' => "Account" # 'hostname' => 'amail1' # 'identifier' => undef # 'level' => "Information" # 'pid' => 23494 # 'process' => 'smtpd' # 'timestamp' => 1014159627 # print "parse $_\n\n"; my $log = $parser->parse( $_ ); # die "not a smtpd log line\n" unless $log->{process} eq 'smtpd'; die "in_channel and type_entry must be defined\n" unless exists $log->{in_chan} && exists $log->{entry}; $start_time = $log->{timestamp} unless $start_time; $end_time = $log->{timestamp} if $log->{timestamp} > $end_time; # print "$start_time $end_time \n"; # print "$log->{pid} $log->{count} $log->{in_chan} $log->{out_chan} $log->{entry}\n\n"; # At this stage we are building 3 variables: # Variable Typical value # --------------------------------- # $module Error-Handler # @fields 0 'GWW7SV01.L00' # 1 '' # 2 841 # 3 '<20020529220013.AAA8579+3977504@ama...>' # my ( $from, $oriTo, $curTo, @fields ) = split /:/, $log->{content}; # Happens at the end of the error dump #$module = "" unless defined $module; SWITCH: for ( $log->{entry} ) { /E|EA|ES|ESA/ && do { smtp_enqueued_event( $log ); last SWITCH; }; /D|DA|DS|DSA/ && do { smtp_deliver_event( $log ); last SWITCH; }; # /Denied TCP/ && do { # smtp_deny_event( $log, join( ":", $log ) ); # last SWITCH; # }; /J/ && do { # Skip those messages # push(@server_msg,$l); last SWITCH; }; /I/ && do { # ETRN should have been disabled, reconsider your config push(@server_msg,$l); last SWITCH; }; /Q|R|W|Z/ && do { # failure in dequeing message # log is the same as a local delivery plus the error smtp_failure_event( $log ); last SWITCH; }; /C|O|X|Y/ && do { # log connection should not be set # check that LOG_CONNECTION is set to 3 in mta_option.dat # Skip those messages last SWITCH; }; # $log->{facility} eq "General" && # $log->{level} eq 'Error' && do { # if ($log->{content} =~ /SSL/) { # push(@server_msg,$l); # last SWITCH; # } # lr_debug( "*** $l" ) if $debug; # # HED_Event( $log, $module, join( ":", @fields ) ); # last SWITCH; # }; # /Error-Handler/ && do { # lr_debug( "*** $l" ) if ($debug); # # We skip if this is the header before the HED is # # dumped. The useful data are in the HED # if ($log->{content} =~ /General Notice: Error-Handler/) { # last SWITCH; # } # # If this is an HED then we process it. # #Handler_Error_Event( $log, \@fields ); # last SWITCH; # }; # Unknown message die "unknown module: \n"; }; }; if ($@) { lr_warn( $@ ); lr_warn( "failed to parse line $. '$_'. Skipping." ); $errorlines++; } } # At the end of the file we should treat the last errors # eval { Process_HED( $end_time ) if $error_envid }; # if ($@) { lr_warn( $@ ) }; Print_Server_Messages(\@server_msg); print_deferred_messages( $end_time ); end_dlf_converter( $lines, $dlflines, $errorlines ); __END__ # Local Variables: # mode: cperl # End: