I call this program whitelist. It lets you run a command on a bunch of files depending on whether the file is an email and has a from address in a whitelist.
It's useful for maintaining whitelisted mailboxes and analysing mailboxes. With a few more tests it might be a generically useful tool.
import commands
import email.Parser
import sys
import re
import getopt
import os
import os.path
try:
from email.utils import parseaddr
except:
from rfc822 import parseaddr
def help():
print """whitelist.py -h
whitelist.py [-v] [-f whitelist filename] command ; filelist [-]
Execute the specified command (which must be shell escaped if calling
from shell) on all the files in the filelist or, if - is present in
the filelist, read from stdin (like xargs) whenever the file is an
email that contains a from address specified in the whitelist.
Like xargs, or find, the command can include {} as a replacement token
for the matched filename.
The command can also be a header reference, for example:
$FROM
will print the specified mails From address.
Options:
-v specifies that the test is to be negated, executing the action if
the file does NOT contain a from address in the whiltelist.
-f specifies a whitelist, the default is $HOME/.addresses
For example:
whitelist.py -f .wlist wc \{} \: maildir/cur/*
runs wc on each file in maildir/cur with a FROM address matching
something in the whitelist; or:
find maildir/INBOX/cur -type f | whitelist.py -v mv \{} mailbox/TRASH/cur \; -
mv's all files in the INBOX with FROMs not matching the whitelist into
a TRASH folder.
find maildir/Greylist/new -type f | whitelist.py -v $TO \; -
displays the TO address of all messages where the from didn't match
the whitelist.
"""
def read_whitelisted(filename):
fd = open(filename)
data = fd.read()
fd.close()
return data.split()
def get_msg(filename):
fd = open(filename)
try:
msg = email.Parser.HeaderParser().parse(fd, True)
return msg
finally:
fd.close()
action_re = re.compile("\{}")
def handle(filenames_fn, action, whitelist, negate=False):
for filename in filenames_fn():
msg = get_msg(filename)
realname, addr = parseaddr(msg["from"])
result = addr in whitelist
if negate:
result = not result
if result:
try:
m = re.match("\$(.+)", action)
result = msg[m.group(1)]
except Exception:
cmd_str = action_re.sub(filename, action)
os.system(cmd_str)
else:
print result
def main(args):
negate = False
whitelist_filename = os.path.join(os.environ["HOME"], ".addresses")
opts, args = getopt.getopt(args, "hv")
for o,a in opts:
if o == "-h":
help()
sys.exit(0)
elif o == "-v":
negate = True
elif o == "-f":
whitelist_filename = a
if not os.access(whitelist_filename, os.F_OK):
print >>sys.stderr, "whitelist.py - no whitelist filename\n"
help()
sys.exit(1)
cmdstr = " ".join(args)
m = re.match("(.*) ;([ ]*.*)", cmdstr)
if not m:
sys.exit(1)
cmd = m.group(1)
files = m.group(2).strip().split(" ")
def ffn():
for f in files:
if f == "-":
for innerf in sys.stdin:
yield innerf.strip()
else:
yield f
return
whitelist = read_whitelisted(whitelist_filename)
handle(ffn, cmd, whitelist, negate)
if __name__ == "__main__":
main(sys.argv[1:])