#! @PERL@ -w # vim:syntax=perl use strict; use lib '@LR_PERL5LIBDIR@'; use Lire::Config; use Lire::Program qw( :msg tempdir tempfile $PROG $LR_ID ); use Getopt::Long; use Mail::Address; use MIME::Parser; use MIME::WordDecoder; use File::Path qw/rmtree/; use File::Copy qw/move/; my $usage = "Usage: $PROG [-c type]"; my %opts; GetOptions( \%opts, "content-type=s" ) or lr_err( $usage ); my $tmpdir = eval { tempdir( "$PROG.$LR_ID.XXXXXX" ) }; lr_err( $@ ) if $@; my $parser = new MIME::Parser; $parser->output_dir( $tmpdir ); $parser->extract_uuencode(1); my $file = @ARGV ? $ARGV[0] : "-"; my $msg = eval { $parser->parse_open( $file ) }; if ($@) { # Report error my $head = $parser->last_head; print "${PROG}_OK=\n"; eval { print_header_vars( $head ) }; lr_err( $@ ); } eval { print_header_vars( $msg->head ); print_attachment( $msg, $opts{'content-type'} ); }; lr_err( $@ ) if $@; print qq{${PROG}_OK="1"\n}; # Unless we are searching for a specific content-type, # the extracted attachment will be the biggest one, so as to # skip small messages or signatures. sub print_attachment { my ( $msg, $type ) = @_; my ($fh, $tmpfile) = tempfile( "$PROG.$LR_ID.XXXXXX" ); my $target = find_attachment( $msg, $type ); unless ($target) { # Empty body print qq{${PROG}_FILE=""\n}; unlink $tmpfile; close $fh; return; } my $body = $target->bodyhandle; if ( $body->path ) { close $fh; # File is on disk, rename the file move( $body->path, $tmpfile ) or lr_err( "error renaming ", $body->path, " to $tmpfile: $!" ); } else { print $fh $body->as_string; close $fh; } print qq{${PROG}_FILE="$tmpfile"\n}; } sub find_attachment { my ( $ent, $type ) = @_; my $size = 0; my $part; # Iterate through all possible bodies: either the top-level body # in the case of a non-multipart message. Or all sub parts. my @possible = $ent->parts ? $ent->parts : $ent; foreach my $candidate ( @possible ) { # Possibly recurse if ( $candidate->parts ) { $candidate = find_attachment( $candidate, $type ); next unless $candidate; } if ( $type ) { return $candidate if ( $candidate->mime_type eq $type ); } else { # This is the one if it's bigger than the one we already # found. This heuristic may fail in the case that a really # log file smaller than a possible signature is sent, but # it's already an improvement over always returning the first or # last attachment. my $body = $candidate->bodyhandle; next unless $body; if ( $body->path ) { if ( -s $body->path > $size ) { $size = -s $body->path; $part = $candidate; } } else { if ( length $body->as_string > $size ) { $size = length $body->as_string; $part = $candidate; } } } } return $part; } sub extract_email { my ( $header ) = @_; return undef unless defined $header; my @addrs = Mail::Address->parse( unmime( $header) ); return undef unless @addrs; # _SUBMITTER is space separated in case of multiple addresses return join ' ', map { $_->address } @addrs; } sub print_header_vars { my ($head) = @_; my $from = extract_email( $head->get( "from" ) ); my $reply_to = extract_email( $head->get( "reply-to" ) ); my $sender = extract_email( $head->get( "sender" ) ); my $to = extract_email( $head->get( "to" ) ); my $submitter = $reply_to ? $reply_to : ( $from ? $from : $sender ); die "can't determine submitter\n" unless $submitter; print qq{${PROG}_SUBMITTER='$submitter'\n}; print qq{${PROG}_TO='$to'\n}; my $subject = $head->get( "subject" ); if ( defined $subject ) { $subject = unmime( $subject ); # Remove all non-safe characters: # Safe characters are: # alphanumeric characters # punctuation and other glyphs: @[](){}*=+-%^:.,;?#|/ # spaces: ' ' and \t # Extended Latin 1: 160-255 $subject =~ s{[^-\w\d\@\Q[](){}*=+-%^:.,;?#/|\E\xA8-\xFF \t]+}{}g; print qq{${PROG}_SUBJECT='$subject'\n}; } else { print qq{${PROG}_SUBJECT=''\n}; } my $date = $head->get( "date" ); if ( defined $date ) { $date = unmime( $date ); $date =~ tr/a-zA-Z0-9:+ -//cd; print qq{${PROG}_DATE='$date'\n}; } else { print qq{${PROG}_SUBJECT=''\n}; } } END { if ( defined $tmpdir && -d $tmpdir ) { if ( Lire::Config->get( 'lr_keep' ) ) { lr_info( "keeping temporary files in $tmpdir on your request" ); } else { rmtree( $tmpdir, 0, 1 ); } } } # Local Variables: # mode: cperl # End: __END__ =pod =head1 NAME B - Extract file, sender and subject information from an email =head1 SYNOPSIS eval `B [B<-c I>] ` =head1 DESCRIPTION B reads an email message on stdin (or specified as argument), will extract the submitter address and the subject from that email. It will also extract the biggest attachment or the first one having a content-type matching the I argument. This information is passed to the caller by printing that information in a format suitable for eval. The following variable will be printed on STDOUT: =over 4 =item lr_getbody_SUBMITTER Email address of the submitter. That is the first email that is found in the headers (searched in the following order): Reply-To, From, Sender. This will only contains the address portion (without the <>). =item lr_getbody_TO The email address to which the email was sent. =item lr_getbody_SUBJECT Subject of the message. That string is sanitized. =item lr_getbody_DATE The date of the message. That string is sanitized. =item lr_getbody_FILE Path to the temporary file holding the found attachment. =item lr_getbody_OK This will be set if the operation was completed successfully. It is possible for B to fail to extract the attachment, but the header information might still be available. =back This script is used by lr_processmail(1) and lr_rawmail2mail(1). =head1 VERSION $Id: lr_getbody.in,v 1.10 2006/07/23 13:16:33 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2002 Stichting LogReport Foundation LogReport@LogReport.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 AUTHOR Francis J. Lacoste =cut # Local Variables: # mode: cperl # End: