#! @PERL@ -w # vim:syntax=perl use strict; use lib '@LR_PERL5LIBDIR@'; use vars qw/%conn_cache %skipped @new_conn $dlf_maker $dlflines/; use Lire::DlfSchema; use Lire::Email; use Lire::Program qw( :msg :dlf ); use Time::Local; sub date2epoch { my ( $mon, $day, $year, $hour, $min, $sec, $ampm ) = $_[0] =~ m!^^(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+) (AM|PM)! or die "invalid ArGoSoft timestamp: $_[0] (should be mm/dd/yyyy HH:MM:SS AM|PM)\n"; # Checks are handled in timelocal $mon -= 1; $year -= 1900; $hour += 12 if $ampm eq 'PM'; timelocal( $sec, $min, $hour, $day, $mon, $year ); } sub lex_record { # Remove DOS line ending $_[0] =~ s/\r?\n$//; # Returns date, conn_id, msg return $_[0] =~ /^(.*?) - (?:[\[\(]\s*(\d+)[\]\)] )?(.*)$/ or die "Invalid ArGoSoft Mail Server log line\n"; } # We use a FIFO array to hold the different messages because ArGoSoft # log file doesn't contains any cross-connection (smtp-in, smtp-out, # local delivery, etc.) identifier. So we use the from and to # addresses. # # INVARIANT: We assume that ArGoSoft will deliver the message in the # order it received them. my @msg_queue; sub find_msg_in_queue { my ( $from, $to, $start_from_end ) = @_; # Find a message in the queue that was addressed to # those addresses if (!$start_from_end) { for (my $i=0; $i < @msg_queue; $i++ ) { my $msg = $msg_queue[$i]; next if defined $from && defined $msg->{from} && $msg->{from} ne $from; next if defined $to && defined $msg->{to} && $msg->{to} ne $to; # Found, keep the position in the message queue of the message $msg->{_queue_pos} = $i; return $msg; } } else { for (my $i=$#msg_queue; $i >= 0; $i-- ) { my $msg = $msg_queue[$i]; next if defined $from && defined $msg->{from} && $msg->{from} ne $from; next if defined $to && defined $msg->{to} && $msg->{to} ne $to; # Found, keep the position in the message queue of the message $msg->{_queue_pos} = $i; return $msg; } } return undef; } sub enqueue_msg { my ( $msg ) = @_; push @msg_queue, $msg; $msg->{_queue_pos} = $#msg_queue; } sub dequeue_msg { my ( $msg ) = @_; die "Invalid msg: missing _queue_pos attribute\n" unless defined $msg->{_queue_pos}; die "Invalid msg: invalid _queue_pos attribute: $msg->{_queue_pos}\n" if $msg->{_queue_pos} < 0 || $msg->{_queue_pos} > $#msg_queue; splice @msg_queue, $msg->{_queue_pos}, 1; } sub flush_queue { foreach my $msg ( @msg_queue ) { lr_warn( "Message from $msg->{from} to $msg->{to} still in queue but stat isn't 'deferred'" ) if $msg->{stat} ne 'deferred'; print_argo_dlf( $msg ); } } sub delivery_dlf { my ( $date, $conn, $stat, $xstat, $final ) = @_; my $time = date2epoch( $date ); foreach my $to ( @{$conn->{rcpt_to}} ) { my $msg; if ( $conn->{proto} eq 'smtp-in' ) { $msg = {}; $msg->{time} = $conn->{time_start}; $msg->{from} = $conn->{mail_from}; $msg->{to} = $to; $msg->{from_relay_host} = $conn->{from_host}; $msg->{from_relay_ip} = $conn->{from_ip}; $msg->{delay} = $time - $msg->{time}; enqueue_msg( $msg ); } elsif ( $conn->{proto} eq 'smtp-out' || $conn->{proto} eq 'delivery' ) { $msg = find_msg_in_queue( $conn->{mail_from}, $to ); # Try a bounce $msg = find_msg_in_queue( "<>", $to ) unless $msg; unless ($msg ) { lr_notice( "can't find origin of message from $conn->{mail_from} to $to at line $. assuming webmail post" ); $msg = { time => $conn->{time_start}, from => $conn->{mail_from}, from_relay_host => "webmail", from_relay_ip => "127.0.0.1", to => $to }; enqueue_msg( $msg ); } if ( $conn->{proto} eq 'delivery' ) { $msg->{to_relay_host} = "localhost"; $msg->{to_relay_ip} = "127.0.0.1"; } else { $msg->{to_relay_host} = $conn->{to_host}; $msg->{to_relay_ip} = $conn->{to_ip}; } $msg->{delay} = $time - $msg->{time}; } else { die "Unknown protocol in delivery_dlf: $conn->{proto}\n"; } $msg->{stat} = $stat if defined $stat; $msg->{xstat} = $xstat if defined $xstat; if ( $final ) { print_argo_dlf( $msg ); dequeue_msg( $msg ) } } } sub bounce_msg { my ( $date, $conn ) = @_; my $time = date2epoch( $date ); my $msg = {}; $msg->{time} = $conn->{time_start}; $msg->{from} = "<>"; $msg->{to} = $conn->{mail_from}; $msg->{from_relay_host} = "localhost"; $msg->{from_relay_ip} = "127.0.0.1"; $msg->{delay} = 0; enqueue_msg( $msg ); } sub print_argo_dlf { my ( $dlf ) = @_; if ( defined $dlf->{from} ) { my $email; sanitize( "emailadress", $dlf->{from}, $email ); ($dlf->{from_user}, $dlf->{from_domain}) = splitemailadress( $email ); } if ( defined $dlf->{to} ) { my $email; sanitize( "emailadress", $dlf->{to}, $email ); ($dlf->{to_user}, $dlf->{to_domain}) = splitemailadress( $email ); } sanitize( "relayhost", $dlf->{from_relay_host}, $dlf->{from_relay_host}) if $dlf->{from_relay_host}; sanitize( "relayhost", $dlf->{to_relay_host}, $dlf->{to_relay_host}) if $dlf->{to_relay_host}; sanitize( "relayip", $dlf->{from_relay_ip}, $dlf->{from_relay_ip}) if $dlf->{from_relay_host}; sanitize( "relayip", $dlf->{to_relay_ip}, $dlf->{to_relay_ip}) if $dlf->{to_relay_ip}; print join( " ", @{$dlf_maker->( $dlf )}), "\n"; $dlflines++; } sub new_connection { my ( $date, $proto, $host, $ip ) = @_; push @new_conn, { proto => $proto, from_host => $host, from_ip => $ip }; } sub start_connection { my ( $date, $conn_id, $msg ) = @_; my $proto; if ( $msg =~ /^220 ArGoSoft/ ) { $proto = "smtp-in"; } elsif ( $msg =~ /^\+OK ArGoSoft/ ) { $proto = "pop3"; } elsif ( $msg =~ /^(GET|POST|PUT|HEAD)/ ) { $proto = "web"; } else { die "Can't determine protocol from $msg\n"; } # Try to find a matching new connection for that line for (my $i=0; $i < @new_conn; $i++ ) { if ( $new_conn[$i]{proto} eq $proto ) { # Found $conn_cache{$conn_id} = { conn_id => $conn_id, time_start => date2epoch( $date ), %{$new_conn[$i]}, state => "start", }; splice @new_conn, $i, 1; return 1; } } return 0; } sub end_connection { my ( $date, $conn_id ) = @_; delete $conn_cache{$conn_id}; } sub start_delivery { my ( $date, $conn_id, $to_email ) = @_; my $nrcpt; if ( $to_email =~ /^(\d+) recipients$/ ) { $nrcpt = $1; $to_email = undef; } else { $nrcpt = 1; } $conn_cache{$conn_id} = { conn_id => $conn_id, proto => "delivery", time_start => date2epoch( $date ), nrcpt => $nrcpt, delivering_to => $to_email, } } my %smtp_state = ( start => \&smtp_start, mail_from_reply => \&smtp_mail_from_reply, rcpt_to => \&smtp_rcpt_to, rcpt_to_reply => \&smtp_rcpt_to_reply, data => \&smtp_data, ); sub smtp_reset { my ( $conn ) = @_; $conn->{state} = "start"; delete $conn->{mail_from}; delete $conn->{rcpt_to}; delete $conn->{last_rcpt}; } sub smtp_start { my ( $date, $conn, $msg ) = @_; if ( $msg =~ /^MAIL FROM:\s*(<[^>]*>|\S+)/ ) { sanitize( "emailadress", $1, $conn->{mail_from} ); $conn->{state} = "mail_from_reply"; } elsif ( $msg =~ /^RSET/ ) { smtp_reset( $conn ); } elsif ( $conn->{proto} eq 'smtp-out' ) { # Check for delivery status message smtp_delivery( $date, $conn, $msg ); } # Ignore other messages in the start state } sub smtp_mail_from_reply { my ( $date, $conn, $msg ) = @_; if ( $msg =~ /^250/ ) { if ( $msg =~ /^250 Sender "([^"]*) OK.../ ) { # ArGoSoft code $conn->{mail_from} = $1; } $conn->{state} = "rcpt_to"; } elsif ( $msg =~ /^(Checking|Address)/ ) { # Ignore those informational messages } elsif ( $msg =~ /^451 (.*)/ ) { # Mail from address rejected # The DLF record will be output when we see the Fatal Error message. # # FIXME: Check how the log looks in the case of failing SMTP relaying # attempt in inbound connections $conn->{state} = "start"; } elsif ( $msg =~ /^Error: \[(\d+)\](.*)/ ) { # WinSock error delivery_dlf( $date, $conn, "winsock error", $2, 1); } else { die "Unknown message in smtp_mail_from state\n"; } } sub smtp_rcpt_to { my ( $date, $conn, $msg ) = @_; if ( $msg =~ /^RCPT TO:\s*(<[^>]*>|\S+)/ ) { sanitize( "emailadress", $1, $conn->{last_rcpt} ); $conn->{state} = "rcpt_to_reply"; } elsif ( $msg =~ /^RSET/ ) { smtp_reset( $conn ); } elsif ( $msg =~ /^DATA/ ) { $conn->{state} = "data"; } elsif ( $msg =~ /^QUIT/ ) { $conn->{state} = "start"; } else { die "Unknown message in rcpt_to state\n"; } } sub smtp_rcpt_to_reply { my ( $date, $conn, $msg ) = @_; if ( $msg =~ /^250/ ) { if ( $msg =~ /^250 Recipient "([^"]*) OK.../ ) { # ArGoSoft code push @{$conn->{rcpt_to}}, $1; } elsif ( $msg =~ /^250/ ) { # Other server's code push @{$conn->{rcpt_to}}, $conn->{last_rcpt}; } delete $conn->{last_rcpt}; $conn->{state} = "rcpt_to"; } elsif ( $msg =~ /^(451|550) (.*)/ ) { # Recipient rejected # In the case of outgoing SMTP connections, we will # output the DLF record when we see the Fatal Error message if ( $conn->{proto} eq 'smtp-in' ) { my $old_to = $conn->{to}; $conn->{to} = [ $conn->{last_rcpt} ]; delivery_dlf( $date, $conn, $1, $1, 1 ); $conn->{to} = $old_to; } delete $conn->{last_rcpt}; $conn->{state} = "rcpt_to"; } else { die "Unknown message in smtp_rcpt_to_reply state\n"; } } sub smtp_data { my ( $date, $conn, $msg ) = @_; # Skip, lines like # 3/17/2002 3:45:49 PM - [ 601] return unless defined $msg && length $msg; if ( $msg =~ /^354 /) { # Beginning of the message # In the case of incoming SMTP connection, # we have to enqueue the message now, even though we didn't # received the 250 confirmation yet because the delivering message # often appears before it. delivery_dlf( $date, $conn, "sent", $1 ) if $conn->{proto} eq 'smtp-in'; } elsif ( $msg =~ /^550 (.*)/ ) { # Message rejected for some reason: content filtering, # message size, etc. # In the case of outgoing SMTP connections, we will # output the DLF record when we see the Fatal Error message if ( $conn->{proto} eq 'smtp-in' ) { my $xstat = $1; my $stat; if ( $xstat =~ /filter/ ) { $stat = "content_rejected"; } elsif ( $xstat =~ /size/ ) { $stat = "size"; } else { $stat = "unknown_error"; } foreach my $to ( @{$conn->{rcpt_to}} ) { my $msg = find_msg_in_queue( $conn->{mail_from}, $to ); # Since we queued the message on the 354, this shoudn't # happen die "missing message from $conn->{mail_form} to $to\n" unless $msg; $msg->{stat} = $stat; $msg->{xstat} = $xstat; print_argo_dlf( $msg ); dequeue_msg( $msg ); } } $conn->{state} = "start"; } elsif ( $msg =~ /^250 (.*)/ ) { # Message accepted for delivery delivery_dlf( $date, $conn, "sent", $1 ) if $conn->{proto} eq 'smtp-out'; $conn->{state} = "start"; } elsif ( $msg =~ /^\./ ) { # End of the message ; } else { die "Unknown message in smtp_data state\n"; } } # HEURISTICS: We assume that ArGoSoft will deliver only to one address at # a time. sub smtp_delivery { my ( $date, $conn, $msg ) = @_; if ( $msg =~ /^Message from.*?relayed/ ) { # Completed relaying delivery_dlf( $date, $conn, undef, undef, 1 ); smtp_reset( $conn ); $conn->{nrcpt}--; $conn->{proto} = "delivery"; } elsif ( $msg =~ /^Fatal SMTP Error: .*?\.\.\. (.*?) Will bounce back/ ) { # Bounce # Make sure we have a valid rcpt_to $conn->{rcpt_to} = [ $conn->{delivering_to} ] unless defined $conn->{rcpt_to}; delivery_dlf( $date, $conn, "bounced", $1, 1 ); # Add a bounce message to the queue bounce_msg( $date, $conn ); $conn->{nrcpt}--; $conn->{proto} = "delivery"; } elsif ( $msg =~ /^SMTP Server Error: (.*?) Will retry in/ ) { # Temporary failure delivery_dlf( $date, $conn, "deferred", $1 ); $conn->{nrcpt}--; $conn->{proto} = "delivery"; } # Remove from connection cache if this was the last recipient delete $conn_cache{$conn->{conn_id}} unless $conn->{nrcpt}; } sub updt_delivery { my ( $date, $conn, $msg ) = @_; if ( $msg =~ /^Forwarding mail to (.*)/ ) { # Forward my $forward = $1; die "ArGoSoft's logging s*cks: can't determine original destinator in forward to $forward\n" unless $conn->{delivering_to}; # Change the recipient of the message # HEURISTICS: Forward log messages always seem to appear # right after the 354 or 250 answer to the DATA command # The related message is thus nearer then end of the queue # rather than at the beginning $msg = find_msg_in_queue( undef, $conn->{delivering_to}, 1 ); $msg->{to} = $forward if ( $msg ); $conn->{nrcpt}--; } elsif ( $msg =~ /^Copy of the message from (\S+) kept in mailbox (.*)/ ){ # Copy before forward delivery_dlf( $date, $conn, "sent", undef ); } elsif ( $msg =~ /^Message from (.*?) delivered to (.*)/ ) { # Completed local delivery $conn->{mail_from} = $1; $conn->{rcpt_to} = [ $2 ]; delivery_dlf( $date, $conn, "sent", undef, 1); $conn->{nrcpt}--; } elsif ( $msg =~ /^Trying the server (\S+): ([\d.]+)/ ) { # Outgoing connection $conn->{proto} = "smtp-out"; $conn->{to_host} = $1; $conn->{to_ip} = $2; $conn->{state} = "start"; } elsif ( $msg =~ /^(Attempting to deliver|Retrieved \d+ MX records)/) { # Start of outgoing SMTP connection } else { die "Unknown message in updt_delivery state\n"; } # Remove from connection cache if this was the last recipient delete $conn_cache{$conn->{conn_id}} unless $conn->{nrcpt}; } sub parse_record { my ( $date, $conn_id, $msg ) = lex_record( $_[0] ); if ( defined $conn_id ) { my $conn = $conn_cache{$conn_id}; if ( $msg =~ /Delivering to (.*)/ ) { start_delivery( $date, $conn_id, $1 ); return; } elsif ( ! defined $conn ) { # This may be part of a new connection return if @new_conn && start_connection( $date, $conn_id, $msg ); # Connection that started before the beginning of # the log file lr_notice( "Skipping connection $conn_id which started before the beginning of the log file" ) unless $skipped{$conn_id}; $skipped{$conn_id} = 1; return; } if ( $conn->{proto} eq 'smtp-in' || $conn->{proto} eq 'smtp-out' ) { $smtp_state{$conn->{state}}->( $date, $conn, $msg ); } elsif ( $conn->{proto} eq 'delivery' ) { updt_delivery( $date, $conn, $msg ); } # Ignore other connections 'type } elsif ( $msg =~ /^Requested (SMTP|POP3|Web) connection from ([\d.]+) \[([^\]]+)\]/ ) { # Start of connection my $proto = lc $1; $proto .= "-in" if $proto eq 'smtp'; new_connection( $date, $proto, $3, $2 ); } elsif ( $msg =~ /ended. ID=(\d+)/ ) { # End of connection end_connection( $date, $1 ); } elsif ( $msg =~ /stopped|started/ ) { # INVARIANT: We assume that the servers can't be # restarted independantly from one another. # Reset connection cache lr_notice( "Servers restarted at line $. but there are still active connections: ", join ( ", ", keys %conn_cache ) ) if keys %conn_cache; %conn_cache = (); %skipped = (); } else { die "Unknown record\n"; } } my $lines = 0; $dlflines = 0; my $errorlines = 0; my $schema = eval { Lire::DlfSchema::load_schema( "email" ); }; lr_err( "error loading email schema: $@" ) if $@; $dlf_maker = $schema->make_hashref2asciidlf_func( qw/time from_user from_domain from_relay_host from_relay_ip delay to_user to_domain to_relay_host to_relay_ip stat xstat/ ); @new_conn = (); %conn_cache = (); %skipped = (); @msg_queue = (); init_dlf_converter( "email" ); my $line; while ( defined( $line = <> ) ) { $lines++; eval { parse_record( $line ) }; if ($@) { lr_warn( $@ ); lr_warn( "failed to parse '$line'. Skipping." ); $errorlines++; } } if ( keys %conn_cache ) { lr_notice( "There are still active connections at the end of the log file: ", join( ", ", keys %conn_cache )) } # Output all pending messages flush_queue(); end_dlf_converter( $lines, $dlflines, $errorlines ); __END__ =pod =head1 NAME argomail2dlf - converts ArGoSoft Mail Server log files to the email DLF format =head1 SYNOPSIS B =head1 DESCRIPTION B converts a ArGoSoft Mail Server log file to email DLF format. Information on the ArGoSoft Mail Server can be found on http://www.argosoft.com/applications/mailserver/. The generic email DLF format is described in email.xml. This DLF converter was developed and tested with the ArGoSoft Mail Server Pro. It could work with the Free and Plus versions, but it wasn't tested. =head1 LOGGING CONFIGURATION To operate properly with Lire, you have to make sure that the following logging features are enabled: - Log SMTP commands. - Log SMTP conversations. - Log to File. The other two options (Log POP Commands and Log Web Commands) aren't needed and can be left unchecked. =head1 LIMITATIONS Due to the limitations in the logging system of ArGoSoft Mail Server, the following information isn't available: - size of messages - message's ID Also, since ArGoSoft doesn't log "queue identifier" (that is an identifier that can be used to track the delivery of a message across the different components), we have to rely on heuristics and assumed invariants to track the messages. We use the sender and recipient addresses to track the messages. =over 4 =item Delivery in order We assume that ArGoSoft Mail Server will deliver its messages in the order it received them. =item One recipient delivery We assume that ArGoSoft Mail Server will only deliver to one recipient at a time. That is that if a message was adressed to two recipients to the same remote demain, it will use two connections to deliver this message. =item Local delivery happens immediately We assume that local delivery (and forward) will happen right after the 354 or 250 status code is logged. We have to rely on this, because the log messages related to forwarding don't mention the sender. =item Unknown messages were delivered through the web Messages that are delivered but weren't sent through SMTP are presumed to come from the webmail relay (sent through the web interface). They could also have been sent before the start of the log file. =back =head1 EXAMPLES To process a log as produced by argomail $ argomail2dlf < mail.log argomail2dlf will be rarely used on its own, but is more likely called by lr_log2report: $ lr_log2report argomail < /var/log/mail.log =head1 SEE ALSO exim2dlf(1), postfix2dlf(1), sendmail2dlf(1), qmail2dlf(1) =head1 VERSION $Id: argomail2dlf.in,v 1.10 2006/07/23 13:16:34 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2000, 2001 Stichting LogReport Foundation LogReport@LogReport.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 CREDITS We would like to thanks denon from denon.cx for contributing sample log files and helping debug this DLF converter. =head1 AUTHOR Francis J. Lacoste =cut # Local Variables: # mode: cperl # End: