#!/usr/bin/perl -w use strict; use Mail::Field; use MIME::Parser; use MIME::Entity; use HTML::TreeBuilder; use HTML::FormatText; our $VERSION = 1.6; END{ $? = 75 if $? == 255 } #EX_TEMPFAIL #Configure my %cfg; { #An MTA-writeable scratch directory for MIME::Entity to make a mess in, #which should be periodically cleaned by something like tmpwatch(8) chdir('/var/ephemeral'); #1 for use as a Postfix filter, 0 for use in an MTA alias command pipe $cfg{pipe} = 1 ? "|/usr/sbin/sendmail -G -i @ARGV" : '>&STDOUT'; #Path to SpamAssassin client (req. IPC::Filter) False if you wish to bypass $cfg{spam} = '/usr/bin/spamc'; #Maximum size message to process. For spamc *and* internal bypass $cfg{most} = 500_000; #Remove Macintosh metadata (AppleDouble) attachments if true. $cfg{meta} = 1; } #End of user twiddlable bits. #Relatively simple SpamAssassin checking... my($src, $dst, $entity) = join('', ); if( $cfg{spam} && ($cfg{_src} = length($src)) < $cfg{most} ){ eval "use IPC::Filter 'filter'"; unless( $@ ){ $cfg{spam} .= " --max-size=$cfg{most}"; eval { $dst = filter($src, $cfg{spam}) }; die($!) if $@; # (length($dst) > $cfg{_src}) || (crc($dst) ne crc($src)) ? sprintf("%i[%i]", crc($dst), length($dst)) ne sprintf("%i[%i]", crc($src), $cfg{_src}) ? undef($src): die('SpamAssassin did nothing'); } } eval{ $entity = MIME::Parser->new->parse_data($dst || $src) }; die("MIME::Parser problem: $@") unless defined($entity); undef($_) foreach ($dst, $src); #Strip Macintosh meta-data attachments if( $cfg{meta} && $entity->is_multipart ){ my @idx; for(my $i=0; $i<$entity->parts; $i++){ my $part = $entity->parts($i); #RFC 1740 if( $part->head->mime_attr('content-type') eq 'application/applefile' ){ my $magic; $part->bodyhandle->open('r')->read($magic, 4); next if $magic eq "\000\005\026\007"; } push(@idx, $part); } $entity->parts(\@idx); } # Parse only non-multipart text/html mime-type # XXX (Luckily) this heuristic fails for multi-part of html + attachments unless( $entity->is_multipart || ($entity->mime_type ne "text/html") ){ my $body = $entity->bodyhandle->as_string; #Preserve headers but switch to multi/alt so RT doesn't show the body 2x { my $head = $entity->head(); $head->replace('Content-type', 'multipart/alternative'. '; boundary="' . MIME::Entity->make_boundary . '"'); $entity->head($head); } #Unfortunately doing so clobbers the original body! $entity->add_part(MIME::Entity->build(Data => $body, Encoding => '8bit', Type => 'text/html') ); my $tree = HTML::TreeBuilder->new_from_content($body); #XXX utf8_mode my $text = HTML::FormatText->new(lm => 0, rm => 75)->format($tree); $entity->add_part(MIME::Entity->build(Data => $text, Encoding => '8bit', Type => 'text/plain'), 0); } #Give it back open(INJECT, $cfg{pipe}) || die($!); $entity->print(\*INJECT); sub crc { unpack("%32C*", $_[0]) % 65535; } __END__ =pod =head1 NAME mailfilter - multi-functional mail filter =head1 SYNOPSIS This script was developed from the html2mime contribution for RT because I needed to do some additional processing to messages. Furthermore, it seemed silly to connect a series of perl invocations with pipes when the tasks could be done in a single program. C will pass messages to SpamAssassin (via spamc), ammend (most) impolitely composed pure-HTML messages to include an alternate text/plain part, and can remove fluffy Macintosh metadata attachments. =head1 DESCRIPTION =head2 Postfix filter =over =item 1 Create an unpriveleged user named filter. =item 2. Edit F to include the following lines: =back smtp inet n - n - - smtpd -o content_filter=filter:dummy #... filter unix - n n - 8 pipe flags=Rq user=filter argv=/usr/local/bin/mailfilter -f ${sender} -- ${recipient} =head2 Pipes =over =item 1. Change your delivery alias to something like the following: =back rt: "|/opt/rt3/local/bin/mailfilter |/opt/rt3/bin/rt-mailgate --queue "General" --action correspond --url http://localhost/" =head1 OPTIONS There are a few options set in I<%cfg> at the top of the script. =over =item pipe Trinary test to send output to L[B<1>] or F[B<0>] for a pipeline. The latter is also useful for testing. =item spam Path to SpamAssassin L client. =item most Maximum message size in bytes to process with SpamAssassin. =item meta Whether or not to strip out Macintosh metadata attachments. =back In addition, the script C's to a scribble-safe location. =head1 BUGS text/plain parts are not created for multi-part messages consisting of an html body and one or more attachments. =head1 AUTHOR Jerrad Pierce =head1 HISTORY Portions based on http://wiki.bestpractical.com/view/html2mime/523 =over =item 1.6 Fix to use MIME::Entity's native part clobbering =item 1.5 First public release. =back =head1 LICENSE Same terms as Perl itself i.e; Artistic License or GPL2. =cut TODO rtbouncehandler? purge? output_under? #simpler, FH-based checking that blocks on large messages use IPC::Open2; my($from, $to); my $pid = open2($from, $to, '/usr/bin/spamc') || die($!); print $to ; close($to); my $entity = MIME::Parser->new()->parse($from);