diff options
author | Matěj Cepl <mcepl@redhat.com> | 2014-03-08 01:45:06 +0100 |
---|---|---|
committer | Matěj Cepl <mcepl@redhat.com> | 2014-03-08 02:00:40 +0100 |
commit | 2204de5201c339935c72cc23490f8ed6f47ac6d9 (patch) | |
tree | 578e2bc389643ad3fa4192d61fe8b9fd57863fd1 /train_bogofilter | |
parent | f182ad26146f0c266217e95d67912efda1233264 (diff) | |
download | imap-folder-training-2204de5201c339935c72cc23490f8ed6f47ac6d9.tar.gz |
First version of train_bogofilter.
Diffstat (limited to 'train_bogofilter')
-rwxr-xr-x | train_bogofilter | 86 |
1 files changed, 56 insertions, 30 deletions
diff --git a/train_bogofilter b/train_bogofilter index 7a4eff2..56d1086 100755 --- a/train_bogofilter +++ b/train_bogofilter @@ -1,16 +1,62 @@ #!/usr/bin/python -import imaplib, subprocess, email, sys +import imaplib +import subprocess +import email import logging import email.Parser from ConfigParser import ConfigParser logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', level=logging.DEBUG) -cmd_string = "/usr/bin/dspam --user dspam --class=spam "\ - + "--source=error --deliver=summary --stdout --signature='%s'" +CMD_STR = "/usr/bin/bogofilter -%s" -DEBUG = False +def process_folder(proc_fld_name, bogofilter_param, end_fld_name): + client.select(proc_fld_name) + _, resp = client.search(None, "ALL") + messages = resp[0].split() + proc_msg_count = 0 + logging.debug(messages) + for msgId in messages: + # FIXME or no .PEEK ... do I want to mark a message as Seen? + # If not, I have to unset \Seen flag + # fetch(msgId, '(RFC822)') gets whole message as it is + _, msg_data = client.fetch(msgId, + '(BODY.PEEK[HEADER.FIELDS ' + + '(SUBJECT FROM X-DSPAM-SIGNATURE)])') + logging.debug("msgId = %s", msgId) + headers = hparser.parsestr(msg_data[0][1], headersonly=True) + logging.debug("headers:\n%s", headers) + if 'X-Dspam-Signature' in headers.keys(): + ret = subprocess.Popen(CMD_STR % bogofilter_param, + stdin=subprocess.Popen, + shell=True) + ret.communicate(input=msg_data) # FIXME Whole message + + if ret.returncode == 0: + typ, _ = client.copy(msgId, end_fld_name) + # Also + # http://bytes.com/topic/python/answers\ + # /41900-copying-moving-mail-message-using-imaplib + # Also RFC 2060, section 6.4.8, UID command + # Also http://pymotw.com/2/imaplib/ + # folder.copy(msg_ids, where_folder) + if typ != 'OK': + raise IOError("Cannot store a message to the folder %s" + % end_fld_name) + else: + client.store(msgId, '+FLAGS', r'(\Deleted)') + proc_msg_count += 1 + else: + raise OSError("bogofilter finished with the returncode: %d" + % ret) + + client.expunge() + client.close() + + return proc_msg_count + +processedCounter = 0 hparser = email.Parser.Parser() config = ConfigParser() config.read("/etc/bogofilter-imap-train.cfg") @@ -19,31 +65,11 @@ login = config.get("imap-training", "login") password = config.get("imap-training", "password") client = imaplib.IMAP4_SSL("luther.ceplovi.cz") client.login(login, password) -client.select("Public folders/Junk") -status, resp = client.search(None, "ALL") -messages = resp[0].split() -processedCounter = 0 -logging.debug(messages) - -for msgId in messages: - # or no .PEEK ... do I want to mark a message as Seen? - typ, msg_data = client.fetch( - msgId, '(BODY.PEEK[HEADER.FIELDS (SUBJECT FROM X-DSPAM-SIGNATURE)])') - logging.debug("msgId = %s" % msgId) - headers = hparser.parsestr(msg_data[0][1], headersonly=True) - logging.debug("headers:\n%s" % headers) - if 'X-Dspam-Signature' in headers.keys(): - ret = subprocess.Popen(cmd_string - % headers['X-Dspam-Signature'], - shell=True).wait() - if ret == 0: - typ, response = client.store(msgId, '+FLAGS', r'(\Deleted)') - processedCounter += 1 - else: - raise OSError("dspam finished with failure code: %d" % ret) - -client.expunge() -client.close() + +for box in [('Spam', 's', 'Deleted Items'), ('Ham', 'n', 'INBOX')]: + processedCounter += process_folder(box[0], box[1], box[2]) + client.logout() + if processedCounter > 0: - logging.info("Processed %d spam messages." % processedCounter) + logging.info("Processed %d spam messages.", processedCounter) |