#!/usr/bin/python import email import imaplib import logging import os.path import re import subprocess from ConfigParser import ConfigParser logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', level=logging.INFO) log = logging.getLogger('check_bogofilter') if log.getEffectiveLevel() >= logging.DEBUG: imaplib.Debug = 4 pattern_uid = re.compile('\d+ \(UID (?P\d+)\)') def parse_uid(data): match = pattern_uid.match(data) return match.group('uid') def move_messages(ids, target): # Eventually we may move messages in groups of say 50 if len(ids) > 0: ids_str = ','.join(ids) client.uid('COPY', ids_str, target) client.uid('STORE', ids_str, '+FLAGS', r'(\Deleted)') def process_folder(proc_fld_name, junk_fld, unsure_fld, bogofilter_param=['-l']): ham_msgs = [] spam_msgs = [] unsure_msgs = [] client.select(proc_fld_name) _, resp = client.search(None, "UNSEEN") log.debug('resp = %s', resp) messages = resp[0].split() logging.debug('messages = %s', messages) proc_msg_count = len(messages) for msgId in messages: logging.debug('msgId = %s', msgId) typ, msg_data = client.fetch(msgId, '(RFC822)') log.debug('fetch RFC822 result = %s', typ) # The -p (passthrough) option outputs the message with an # X-Bogosity line at the end of the message header. This requires # keeping the entire message in memory when it's read from stdin # (or from a pipe or socket). If the message is read from a file # that can be rewound, bogofilter will read it a second time. # The -e (embed) option tells bogofilter to exit with code 0 if # the message can be classified, i.e. if there is not an error. # Normally bogofilter uses different codes for spam, ham, and # unsure classifications, but this simplifies using bogofilter # with procmail or maildrop. ret = subprocess.Popen(['bogofilter'] + bogofilter_param, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = ret.communicate(input=msg_data[0][1]) ret_code = ret.returncode logging.debug("ret.returncode = %s", ret_code) log.debug('----------------------------\nerr:\n%s', err) typ, data = client.fetch(msgId, "(UID)") log.debug('fetch UID result = %s, %s', typ, data) uid = parse_uid(data[0]) log.debug('UID = %s', uid) if ret_code == 0: # spam spam_msgs.append(uid) elif ret_code == 1: # ham ham_msgs.append(uid) elif ret_code == 2: # unsure unsure_msgs.append(uid) else: # 3 or something else I/O error raise IOError('Bogofilter failed with error %d', ret_code) move_messages(spam_msgs, junk_fld) move_messages(unsure_msgs, unsure_fld) if ham_msgs: client.uid('STORE', ','.join(ham_msgs), '-FLAGS', r'(\Seen)') client.close() return proc_msg_count processedCounter = 0 hparser = email.Parser.Parser() config = ConfigParser() config.read(os.path.expanduser("~/.bogofilter-imap-train-rc")) login = config.get("imap-training", "login") password = config.get("imap-training", "password") server = config.get("imap-training", "server") client = imaplib.IMAP4_SSL(server) client.login(login, password) processedCounter += process_folder('INBOX', 'Junk', 'Unsure') client.logout() if processedCounter > 0: logging.info("Processed %d messages.", processedCounter)