#! @PERL@ -w # vim:syntax=perl use strict; use lib '@LR_PERL5LIBDIR@'; use Lire::DlfSchema; use Lire::Syslog; use Lire::Program qw/:msg :dlf/; use vars qw/ $dlf_maker $dlflines $debug /; sub print_dlf { my $dlf=$dlf_maker->($_[0]); print join( " ", @$dlf ), "\n"; $dlflines++; } my $schema = eval { Lire::DlfSchema::load_schema( "spamfilter" ) }; lr_err( "failed to load spamfilter schema: $@" ) if $@; $dlf_maker = $schema->make_hashref2asciidlf_func( qw/time localserver host_originating ip_originating msgid user msgsize_in msgsize_out time_elapsed spam_score spam_result/); my $lines = 0; $dlflines = 0; my $errorlines = 0; my @server_msg = (); $debug = 0; my $syslog_parser = new Lire::Syslog; my $parser = new Lire::Syslog; init_dlf_converter( "spamfilter" ); my $failed_line = undef; my %data = (); while ( <> ) { chomp; $lines++; my $rec = eval { $syslog_parser->parse( $_ ) }; if ($@) { lr_warn( "line $. is an invalid syslog message: $@" ); $errorlines++; } next unless defined $rec->{process} && $rec->{process} =~/^spamd/; my $pid = $rec->{pid}; my $line = $rec->{content}; if ($line=~/^processing message \<(.+?)\> for (.+?):\d+(, expecting (\d+) bytes)?\. *$/) { $data{$pid}{msgid} = $1; $data{$pid}{user} = $2; $data{$pid}{msgsize_in} = $4 || 0; next; } if ($line=~/^(clean message|identified spam) \((-?[\d.]+)\/[\d.]+\) for .+?:\d+ in +([\d.]+) seconds, (\d+) bytes./) { # ignore leftovers from previous logfiles next if (!defined $data{$pid}{msgid}); my %dlf=(); $dlf{spam_result}=0; $dlf{spam_result}=1 if ($1 eq "identified spam"); $dlf{spam_score} = $2; $dlf{time_elapsed} = $3; $dlf{msgsize_out} = $4; $dlf{msgid} = $data{$pid}{msgid}; $dlf{user} = $data{$pid}{user}; $dlf{msgsize_in} = $data{$pid}{msgsize_in}; $dlf{time} = $rec->{timestamp}; $dlf{localserver} = $rec->{hostname}; print_dlf(\%dlf); $data{$pid}=(); next; } } end_dlf_converter( $lines, $dlflines, $errorlines ); __END__ =pod =head1 NAME spamassassin2dlf - convert SpamAssassins log into Lire spamfilter DLF format =head1 SYNOPSIS B =head1 DESCRIPTION This program converts SpamAssassins ( http://spamassassin.org/ ) spamd syslog files to the Lire spamfilter DLF. =head1 LIMITATIONS The originating host isn't used (yet) because there is no way to link the receiving process and its childs. =head1 EXAMPLES To process a log as produced by SpamAssassin: $ spamassassin2dlf < spamd-log spamassassin2dlf will be rarely used on its own, but is more likely called by lr_log2report: $ lr_log2report spamassassin < /var/log/spamd-log =head1 BUGS This manpage should feature an example SpamAssassin logfile snippet. =head1 SEE ALSO http://spamassassin.org =head1 VERSION $Id: spamassassin2dlf.in,v 1.7 2005/08/24 14:24:52 vanbaal Exp $ =head1 AUTHORS Edwin Groothuis =head1 COPYRIGHT Copyright (C) 2002 Edwin Groothuis Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the copyright holder may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =cut