process email files like unix find
It's useful for maintaining whitelisted mailboxes and analysing mailboxes. With a few more tests it might be a generically useful tool.
#!/usr/bin/python # Copyright (C) 2008 by Tapsell-Ferrier Limited # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; see the file COPYING. If not, write to the # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA import commands import email.Parser import sys import re import getopt import os import os.path try: from email.utils import parseaddr except: from rfc822 import parseaddr def help(): print """whitelist.py -h whitelist.py [-v] [-f whitelist filename] command ; filelist [-] Execute the specified command (which must be shell escaped if calling from shell) on all the files in the filelist or, if - is present in the filelist, read from stdin (like xargs) whenever the file is an email that contains a from address specified in the whitelist. Like xargs, or find, the command can include {} as a replacement token for the matched filename. The command can also be a header reference, for example: $FROM will print the specified mails From address. Options: -v specifies that the test is to be negated, executing the action if the file does NOT contain a from address in the whiltelist. -f specifies a whitelist, the default is $HOME/.addresses For example: whitelist.py -f .wlist wc \{} \: maildir/cur/* runs wc on each file in maildir/cur with a FROM address matching something in the whitelist; or: find maildir/INBOX/cur -type f | whitelist.py -v mv \{} mailbox/TRASH/cur \; - mv's all files in the INBOX with FROMs not matching the whitelist into a TRASH folder. find maildir/Greylist/new -type f | whitelist.py -v $TO \; - displays the TO address of all messages where the from didn't match the whitelist. """ def read_whitelisted(filename): fd = open(filename) data = fd.read() fd.close() return data.split() def get_msg(filename): fd = open(filename) try: msg = email.Parser.HeaderParser().parse(fd, True) return msg finally: fd.close() action_re = re.compile("\{}") def handle(filenames_fn, action, whitelist, negate=False): for filename in filenames_fn(): msg = get_msg(filename) realname, addr = parseaddr(msg["from"]) result = addr in whitelist if negate: result = not result if result: try: m = re.match("\$(.+)", action) result = msg[m.group(1)] except Exception: cmd_str = action_re.sub(filename, action) os.system(cmd_str) else: print result def main(args): negate = False whitelist_filename = os.path.join(os.environ["HOME"], ".addresses") opts, args = getopt.getopt(args, "hv") for o,a in opts: if o == "-h": help() sys.exit(0) elif o == "-v": negate = True elif o == "-f": whitelist_filename = a if not os.access(whitelist_filename, os.F_OK): print >>sys.stderr, "whitelist.py - no whitelist filename\n" help() sys.exit(1) cmdstr = " ".join(args) m = re.match("(.*) ;([ ]*.*)", cmdstr) if not m: sys.exit(1) cmd = m.group(1) files = m.group(2).strip().split(" ") def ffn(): for f in files: if f == "-": for innerf in sys.stdin: yield innerf.strip() else: yield f return whitelist = read_whitelisted(whitelist_filename) handle(ffn, cmd, whitelist, negate) if __name__ == "__main__": main(sys.argv[1:]) # End