#!/usr/bin/python
import datetime
import email
import imaplib
import logging
import os.path
import re
import subprocess
from ConfigParser import ConfigParser
# logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
# level=logging.DEBUG)
log = logging.getLogger('train_bogofilter')
CMD_STR = "/usr/bin/bogofilter -%s"
# imaplib.Debug = 4
# Description in RFC2359
# DEBUG:process_folder:newmsg = '[APPENDUID 1424278334 31178] Append completed.'
APPENDUID_RE = re.compile(r'\[APPENDUID (\d+) (\d+)\] Append completed\.')
def process_folder(proc_fld_name, bogofilter_param, end_fld_name, mark_seen=True):
client.select(proc_fld_name)
_, resp = client.search(None, "ALL")
messages = resp[0].split()
log.debug('messages = %s', messages)
proc_msg_count = 0
for msgId in messages:
log.debug('msgId = %s', msgId)
typ, msg_data = client.fetch(msgId, '(RFC822)')
# log.debug('msg_data:\n%s\n%s', msg_data, '-' * 30)
msg = hparser.parsestr(msg_data[0][1])
date_str = msg['Date']
log.debug("date_str = %s", date_str)
date_tuple=email.utils.parsedate_tz(date_str)
log.debug("date_tuple = %s", date_tuple)
# date_tulpe is 10-tuple (including time distance from GMT)
# but Time2Internaldate wants just 9-tuple
int_date = imaplib.Time2Internaldate(date_tuple[:-1])
log.debug("int_date = %s", int_date)
ret = subprocess.Popen(CMD_STR % bogofilter_param,
stdin=subprocess.PIPE,
shell=True)
ret.communicate(input=msg_data[0][1])
log.debug("ret.returncode = %s", ret.returncode)
if ret.returncode == 0:
del msg['X-Bogosity']
typ, newmsg = client.append(end_fld_name, None,
int_date,
msg.as_string(True))
log.debug("typ = %s", typ)
newmsg = newmsg[0]
log.debug("newmsg = %s (%s)", newmsg, type(newmsg))
if mark_seen:
typ, data = client.store(msgId, '+FLAGS', r'(\Seen)')
log.debug("typ = %s", typ)
log.debug("data = %s", data)
if typ != 'OK':
raise IOError("Cannot store a message to the folder %s"
% end_fld_name)
else:
client.store(msgId, '+FLAGS', r'(\Deleted)')
proc_msg_count += 1
else:
raise OSError("bogofilter finished with the returncode: %d"
% ret)
client.expunge()
client.close()
return proc_msg_count
processedCounter = 0
hparser = email.Parser.Parser()
config = ConfigParser()
config.read(os.path.expanduser("~/.bogofilter-imap-train-rc"))
login = config.get("imap-training", "login")
password = config.get("imap-training", "password")
server = config.get("imap-training", "server")
client = imaplib.IMAP4_SSL(server)
client.login(login, password)
for box in [('Junk', 's', 'Trash', True), ('_ham', 'n', 'INBOX', False)]:
log.debug('box = %s', box)
# processedCounter += process_folder(box[0], box[1], box[2], box[3)
processedCounter += process_folder(*box)
client.logout()
if processedCounter > 0:
log.info("Processed %d messages.", processedCounter)