summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xcheck_bogofilter.py110
1 files changed, 74 insertions, 36 deletions
diff --git a/check_bogofilter.py b/check_bogofilter.py
index 48f20bc..f75c696 100755
--- a/check_bogofilter.py
+++ b/check_bogofilter.py
@@ -3,52 +3,93 @@ import email
import imaplib
import logging
import os.path
+import re
import subprocess
from ConfigParser import ConfigParser
+
logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
- level=logging.DEBUG)
+ level=logging.INFO)
+log = logging.getLogger('check_bogofilter')
+
+if log.getEffectiveLevel() == logging.DEBUG:
+ imaplib.Debug = 4
+
+pattern_uid = re.compile('\d+ \(UID (?P<uid>\d+)\)')
+
+
+def parse_uid(data):
+ match = pattern_uid.match(data)
+ return match.group('uid')
+
+
+def move_messages(ids, target):
+ # Eventually we may move messages in groups of say 50
+ if len(ids) > 0:
+ ids_str = ','.join(ids)
+ client.uid('COPY', ids_str, target)
+ client.uid('STORE', ids_str, '+FLAGS', r'(\Deleted)')
+
-CMD_STR = "/usr/bin/bogofilter -%s"
-imaplib.Debug = 4
+def process_folder(proc_fld_name, junk_fld, unsure_fld,
+ bogofilter_param=['-l']):
+
+ ham_msgs = []
+ spam_msgs = []
+ unsure_msgs = []
-def process_folder(proc_fld_name, bogofilter_param, end_fld_name, mark_seen=True):
client.select(proc_fld_name)
- _, resp = client.search(None, "ALL")
+ _, resp = client.search(None, "UNSEEN")
+ log.debug('resp = %s', resp)
messages = resp[0].split()
logging.debug('messages = %s', messages)
- proc_msg_count = 0
+ proc_msg_count = len(messages)
for msgId in messages:
logging.debug('msgId = %s', msgId)
typ, msg_data = client.fetch(msgId, '(RFC822)')
+ log.debug('fetch RFC822 result = %s', typ)
+
+ # The -p (passthrough) option outputs the message with an
+ # X-Bogosity line at the end of the message header. This requires
+ # keeping the entire message in memory when it's read from stdin
+ # (or from a pipe or socket). If the message is read from a file
+ # that can be rewound, bogofilter will read it a second time.
- msg = hparser.parsestr(msg_data[0][1])
+ # The -e (embed) option tells bogofilter to exit with code 0 if
+ # the message can be classified, i.e. if there is not an error.
+ # Normally bogofilter uses different codes for spam, ham, and
+ # unsure classifications, but this simplifies using bogofilter
+ # with procmail or maildrop.
- ret = subprocess.Popen(CMD_STR % bogofilter_param,
+ ret = subprocess.Popen(['bogofilter'] + bogofilter_param,
stdin=subprocess.PIPE,
- shell=True)
- ret.communicate(input=msg_data[0][1])
-
- logging.debug("ret.returncode = %s", ret.returncode)
- if ret.returncode == 0:
- del msg['X-Bogosity']
- typ, newmsg = client.append(end_fld_name, '', '',
- msg.as_string(True))
- logging.debug("typ = %s", typ)
- logging.debug("newmsg = %s", newmsg)
- # if mark_seen:
- # client.store(newmsg, '+FLAGS', r'(\Seen)')
- if typ != 'OK':
- raise IOError("Cannot store a message to the folder %s"
- % end_fld_name)
- else:
- client.store(msgId, '+FLAGS', r'(\Deleted)')
- proc_msg_count += 1
- else:
- raise OSError("bogofilter finished with the returncode: %d"
- % ret)
-
- client.expunge()
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = ret.communicate(input=msg_data[0][1])
+ ret_code = ret.returncode
+
+ logging.debug("ret.returncode = %s", ret_code)
+ log.debug('----------------------------\nerr:\n%s', err)
+
+ typ, data = client.fetch(msgId, "(UID)")
+ log.debug('fetch UID result = %s, %s', typ, data)
+ uid = parse_uid(data[0])
+ log.debug('UID = %s', uid)
+
+ if ret_code == 0: # spam
+ spam_msgs.append(uid)
+ elif ret_code == 1: # ham
+ ham_msgs.append(uid)
+ elif ret_code == 2: # unsure
+ unsure_msgs.append(uid)
+ else: # 3 or something else I/O error
+ raise IOError('Bogofilter failed with error %d', ret_code)
+
+ move_messages(spam_msgs, junk_fld)
+ move_messages(unsure_msgs, unsure_fld)
+ if ham_msgs:
+ client.uid('STORE', ','.join(ham_msgs), '-FLAGS', r'(\Seen)')
+
client.close()
return proc_msg_count
@@ -64,12 +105,9 @@ server = config.get("imap-training", "server")
client = imaplib.IMAP4_SSL(server)
client.login(login, password)
-for box in [('Junk', 's', 'Trash', True), ('Ham', 'n', 'INBOX', False)]:
- logging.debug('box = %s', box)
- # processedCounter += process_folder(box[0], box[1], box[2], box[3)
- processedCounter += process_folder(*box)
+processedCounter += process_folder('INBOX', 'Junk', 'Unsure')
client.logout()
if processedCounter > 0:
- logging.info("Processed %d spam messages.", processedCounter)
+ logging.info("Processed %d messages.", processedCounter)