diff options
-rwxr-xr-x | check_bogofilter.py | 110 |
1 files changed, 74 insertions, 36 deletions
diff --git a/check_bogofilter.py b/check_bogofilter.py index 48f20bc..f75c696 100755 --- a/check_bogofilter.py +++ b/check_bogofilter.py @@ -3,52 +3,93 @@ import email import imaplib import logging import os.path +import re import subprocess from ConfigParser import ConfigParser + logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', - level=logging.DEBUG) + level=logging.INFO) +log = logging.getLogger('check_bogofilter') + +if log.getEffectiveLevel() == logging.DEBUG: + imaplib.Debug = 4 + +pattern_uid = re.compile('\d+ \(UID (?P<uid>\d+)\)') + + +def parse_uid(data): + match = pattern_uid.match(data) + return match.group('uid') + + +def move_messages(ids, target): + # Eventually we may move messages in groups of say 50 + if len(ids) > 0: + ids_str = ','.join(ids) + client.uid('COPY', ids_str, target) + client.uid('STORE', ids_str, '+FLAGS', r'(\Deleted)') + -CMD_STR = "/usr/bin/bogofilter -%s" -imaplib.Debug = 4 +def process_folder(proc_fld_name, junk_fld, unsure_fld, + bogofilter_param=['-l']): + + ham_msgs = [] + spam_msgs = [] + unsure_msgs = [] -def process_folder(proc_fld_name, bogofilter_param, end_fld_name, mark_seen=True): client.select(proc_fld_name) - _, resp = client.search(None, "ALL") + _, resp = client.search(None, "UNSEEN") + log.debug('resp = %s', resp) messages = resp[0].split() logging.debug('messages = %s', messages) - proc_msg_count = 0 + proc_msg_count = len(messages) for msgId in messages: logging.debug('msgId = %s', msgId) typ, msg_data = client.fetch(msgId, '(RFC822)') + log.debug('fetch RFC822 result = %s', typ) + + # The -p (passthrough) option outputs the message with an + # X-Bogosity line at the end of the message header. This requires + # keeping the entire message in memory when it's read from stdin + # (or from a pipe or socket). If the message is read from a file + # that can be rewound, bogofilter will read it a second time. - msg = hparser.parsestr(msg_data[0][1]) + # The -e (embed) option tells bogofilter to exit with code 0 if + # the message can be classified, i.e. if there is not an error. + # Normally bogofilter uses different codes for spam, ham, and + # unsure classifications, but this simplifies using bogofilter + # with procmail or maildrop. - ret = subprocess.Popen(CMD_STR % bogofilter_param, + ret = subprocess.Popen(['bogofilter'] + bogofilter_param, stdin=subprocess.PIPE, - shell=True) - ret.communicate(input=msg_data[0][1]) - - logging.debug("ret.returncode = %s", ret.returncode) - if ret.returncode == 0: - del msg['X-Bogosity'] - typ, newmsg = client.append(end_fld_name, '', '', - msg.as_string(True)) - logging.debug("typ = %s", typ) - logging.debug("newmsg = %s", newmsg) - # if mark_seen: - # client.store(newmsg, '+FLAGS', r'(\Seen)') - if typ != 'OK': - raise IOError("Cannot store a message to the folder %s" - % end_fld_name) - else: - client.store(msgId, '+FLAGS', r'(\Deleted)') - proc_msg_count += 1 - else: - raise OSError("bogofilter finished with the returncode: %d" - % ret) - - client.expunge() + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = ret.communicate(input=msg_data[0][1]) + ret_code = ret.returncode + + logging.debug("ret.returncode = %s", ret_code) + log.debug('----------------------------\nerr:\n%s', err) + + typ, data = client.fetch(msgId, "(UID)") + log.debug('fetch UID result = %s, %s', typ, data) + uid = parse_uid(data[0]) + log.debug('UID = %s', uid) + + if ret_code == 0: # spam + spam_msgs.append(uid) + elif ret_code == 1: # ham + ham_msgs.append(uid) + elif ret_code == 2: # unsure + unsure_msgs.append(uid) + else: # 3 or something else I/O error + raise IOError('Bogofilter failed with error %d', ret_code) + + move_messages(spam_msgs, junk_fld) + move_messages(unsure_msgs, unsure_fld) + if ham_msgs: + client.uid('STORE', ','.join(ham_msgs), '-FLAGS', r'(\Seen)') + client.close() return proc_msg_count @@ -64,12 +105,9 @@ server = config.get("imap-training", "server") client = imaplib.IMAP4_SSL(server) client.login(login, password) -for box in [('Junk', 's', 'Trash', True), ('Ham', 'n', 'INBOX', False)]: - logging.debug('box = %s', box) - # processedCounter += process_folder(box[0], box[1], box[2], box[3) - processedCounter += process_folder(*box) +processedCounter += process_folder('INBOX', 'Junk', 'Unsure') client.logout() if processedCounter > 0: - logging.info("Processed %d spam messages.", processedCounter) + logging.info("Processed %d messages.", processedCounter) |