#!/usr/bin/perl # # Replacement for the elm frm(1) program which is increasingly # disappearing from computers as elm is superceded by other, more # recent and less useful, programs. # # Bruce Murphy, rattus.net software # November 2003 # pod documentation for this program. use strict; # debugging levels. my ($DEBUG) = 0; # general debugging output. # general variables. my (@tokens); # We should make rfrm behave just like frm if it's called with that name. my ($compat) = 0; print "called as '$0'\n" if $DEBUG; #@tokens = split /\//, $0; #if ($tokens[$#tokens] eq "frm") { if ($0 =~ /(^|\/)frm$/) { print "Using compatibility mode.\n" if $DEBUG; $compat = 1; } # Things we'll be looking for within the headers to create our summary # listing. The header token in question and the element name to store # it under in the summary table. my (%headerelements) = ( "Subject:" => "subject", "From:" => "sender1", "From " => "sender2", "Date: " => "date" ); # Formatting options for the various summary format we have available. my (%formats) = ( "standard" => { sender => 20, subject => 58 } ); # input file selection routine. For the time being we'll either take # one or more files from the command-line, or if we aren't given any, # try to deduce something from the MAIL environment variable. # # printheader, firstheader are variables we'll use to print some # optional information about what file we're currently looking at. if (@ARGV > 0) { # scan each of the items from the command line. my ($printheader) = 1 if @ARGV > 1; my ($firstheader) = 1; my ($mailhandle); foreach my $mailfile (@ARGV) { undef $mailhandle; if ($mailfile eq "-") { # traditional UNIX special case. $mailhandle = "STDIN"; } elsif (! open ($mailhandle, "$mailfile")) { printf STDERR "Unable to open file %s: %s\n", $mailfile, $!; next; } if ($printheader) { # hack to leave extra blank dividing lines between files # without causing any extra blank space. if ($firstheader) { $firstheader = 0; } else { print "\n"; } printf " Mail File: %s\n", $mailfile; } scanfile_mailbox($mailhandle); } } else { # no mail files were specified on the command line. Try the # environment variable and then give up. my $mailhandle; if (exists $ENV{'MAIL'} and -r $ENV{'MAIL'}) { undef $mailhandle; if (open($mailhandle, $ENV{'MAIL'})) { scanfile_mailbox($mailhandle); } else { printf "Unable to open %s: %s\n", $ENV{'MAIL'}, $!; } } else { printf STDERR "No mail files specified and \$MAIL not " . "defined or accessible\n"; exit 1; } } # This is our central subroutine which is responsible for grovelling # through a mailbox file in some format or other, pulling out the # appropriate headers and then feeding them to our summary function. # # This one recognises the generic Berkeley mailbox format used # wherever sensible mailreaders are found. sub scanfile_mailbox { my ($file) = @_; my ($inheader, $elements, $lastelement); my ($token, $data); $inheader = 0; $elements = { }; while (<$file>) { chomp; if (/^From /) { if ($inheader) { # we think we're already in a header. Print what we've # got and start a new one. printsummary($elements); $elements = {}; $lastelement = ""; } else { $inheader = 1; } # this wouldn't ordinarily make it through to the general # header processing steps in the next stanza. ($data) = $_ =~ /^From (.*)/; $elements->{$headerelements{"From "}} = $data; # we can't skip this line, we probably want it for our # header collection as a backup sender.... } if ($inheader) { if (/^$/) { # end of header reached. printsummary($elements); $elements = { }; $inheader = 0; $lastelement = ""; next; } elsif (($token, $data) = $_ =~ /^([-a-zA-Z0-9]+[: ])\s(.*)/ ) { # start of a normal header line. # Let's check to see whether it's one of the headers we're # storing for later. if (exists $headerelements{$token}) { $lastelement = $headerelements{$token}; $elements->{$lastelement} = $data; } else { $lastelement = ""; } } elsif (/^\s/) { # handle continuations. if ($lastelement) { s/^\s+//; # strip whitespace. $elements->{$lastelement} .= $_; } } } } if ($inheader) { printsummary($elements); print "Truncated mail file.\n" if $DEBUG; } } # This function takes a hash which may or may not have the elements # defined in the headerelements hash above and prints out a summary. sub printsummary { my ($elements) = @_; my ($subject, $sender); my ($formatstr); # build subject element. if (exists $elements->{'subject'}) { $subject = $elements->{'subject'}; $subject = cleanline($subject, {whitespace => 1, compresswhite => 1, trimends => 1, stripcontrol => 1, squashhigh => 1 }); $subject = substr($subject, 0, $formats{'standard'}->{'subject'}); } else { $subject = ""; } # build sender element. It would be nice to have a better-designed # fall-through here so that a From: header which doesn't yield # anything useful won't stop us looking at the envelope. if (exists $elements->{'sender1'}) { # grab from the From: header line. $sender = getsender($elements->{'sender1'}); # $sender =~ y/\t/ /; # whitespace # $sender =~ y/\000-\039//d; # bad characters. $sender = substr($sender, 0, $formats{'standard'}->{'sender'}); } elsif (exists $elements->{'sender2'}) { # last-ditch at reading the envelope. (Bearing in mind that # many mail systems throw this away) $sender = $elements->{'sender2'}; ($sender) = $sender =~ /(\S+@\S+)/; $sender = cleanline($sender, {stripcontrol => 1, squashhigh => 1}); if (!$sender) { $sender = "(no sender)"; } } else { $sender = "(no sender)"; } # This should probably be done more globally. $formatstr = sprintf ("%%-%ds %%-%ds\n", $formats{'standard'}->{'sender'}, $formats{'standard'}->{'subject'}); printf $formatstr, $sender, $subject; } ############ # small utility functions that have probably been written many times # before, but which I'm forced to write again becuase I can't find the # others right now. # getsender($fromheader, $fromenv) # # extracts a best guess sender from the two froms, with particular # preference to the real name parts. Understands various real-name # encoding formats as found off in the wider internet. sub getsender { my ($fromheader, $fromenv) = @_; my ($sender); # print STDERR "getsender called with '$fromheader'\n" if $DEBUG; if ($fromheader =~ /<[^>]+@[^>]+>/ ) { # Real Name # $sender = $fromheader; $sender =~ s/<[^>]+@[^>]+>//; $sender =~ y/"//d; } elsif ($fromheader =~ /@.*\(.*\)/ or $fromheader =~ /\(.*\).*@/ ) { # email@address.com (Real Name) # or reversed variants. ($sender) = $fromheader =~ /\((.*)\)/; } else { $sender = $fromheader; } $sender = cleanline($sender, {whitespace => 1, compresswhite => 1, trimends => 1, stripcontrol => 1}); if (! $sender or $sender =~ /^\s+$/) { $sender = "(no sender)"; } return $sender; } # cleanline($string, $opts) # # Our general string cleaning function. We want to be sure that no # nasty characters manage to get through to the general string # handling stuff. Options is a hash ref with significant keys and can # include: # # whitespace: converts all whitespace \r \n \t to spaces. # compresswhite: removes duplicate adjacent whitespace # trimends: removes leading and trailing whitespace. # stripcontrol: strips all control (mostly \000-\039) characters. # striphigh: removes all \200 and above characters. # squashhigh: replaces all \200 and above chars with '_'. # # multiple options are executed in the order specified above. # # example cleanline($string, {whitespace => 1, trimends => 1}) sub cleanline { my ($instring, $opts) = @_; my ($outstring) = $instring; if (not defined $opts) { warn "cleanline called without options on '$instring'"; return $outstring; } if (exists $opts->{'whitespace'}) { $outstring =~ y/\r\t\n/ /; } if (exists $opts->{'compresswhite'}) { $outstring =~ y/\r\t\n /\r\t\n /s; } if (exists $opts->{'trimends'}) { $outstring =~ s/^\s+//; $outstring =~ s/\s+$//; } if (exists $opts->{'stripcontrol'}) { $outstring =~ y/\000-\037/ /; } if (exists $opts->{'striphigh'}) { $outstring =~ y/\200-\377//d; } if (exists $opts->{'squashhigh'}) { $outstring =~ y/\200-\377/_/; } return $outstring; }