summaryrefslogtreecommitdiffstats
path: root/check_bogofilter.py
blob: cd0fb720c2fef2d336b0531458b8b11542e43e23 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/python
import email
import imaplib
import logging
import os.path
import re
import subprocess
from ConfigParser import ConfigParser

logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
                    level=logging.INFO)
log = logging.getLogger('check_bogofilter')

if log.getEffectiveLevel() >= logging.DEBUG:
	imaplib.Debug = 4

pattern_uid = re.compile('\d+ \(UID (?P<uid>\d+)\)')


def parse_uid(data):
    match = pattern_uid.match(data)
    return match.group('uid')


def move_messages(ids, target):
    # Eventually we may move messages in groups of say 50
    if len(ids) > 0:
        ids_str = ','.join(ids)
        client.uid('COPY', ids_str, target)
        client.uid('STORE', ids_str, '+FLAGS', r'(\Deleted)')


def process_folder(proc_fld_name, junk_fld, unsure_fld,
                   bogofilter_param=['-l']):

    ham_msgs = []
    spam_msgs = []
    unsure_msgs = []

    client.select(proc_fld_name)
    _, resp = client.search(None, "UNSEEN")
    log.debug('resp = %s', resp)
    messages = resp[0].split()
    logging.debug('messages = %s', messages)
    proc_msg_count = len(messages)

    for msgId in messages:
        logging.debug('msgId = %s', msgId)
        typ, msg_data = client.fetch(msgId, '(RFC822)')
        log.debug('fetch RFC822 result = %s', typ)

        # The -p (passthrough) option outputs the message with an
        # X-Bogosity line at the end of the message header. This requires
        # keeping the entire message in memory when it's read from stdin
        # (or from a pipe or socket). If the message is read from a file
        # that can be rewound, bogofilter will read it a second time.

        # The -e (embed) option tells bogofilter to exit with code 0 if
        # the message can be classified, i.e. if there is not an error.
        # Normally bogofilter uses different codes for spam, ham, and
        # unsure classifications, but this simplifies using bogofilter
        # with procmail or maildrop.

        ret = subprocess.Popen(['bogofilter'] + bogofilter_param,
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        out, err = ret.communicate(input=msg_data[0][1])
        ret_code = ret.returncode

        logging.debug("ret.returncode = %s", ret_code)
        log.debug('----------------------------\nerr:\n%s', err)

        typ, data = client.fetch(msgId, "(UID)")
        log.debug('fetch UID result = %s, %s', typ, data)
        uid = parse_uid(data[0])
        log.debug('UID = %s', uid)

        if ret_code == 0:  # spam
            spam_msgs.append(uid)
        elif ret_code == 1:  # ham
            ham_msgs.append(uid)
        elif ret_code == 2:  # unsure
            unsure_msgs.append(uid)
        else:  # 3 or something else I/O error
            raise IOError('Bogofilter failed with error %d', ret_code)

    move_messages(spam_msgs, junk_fld)
    move_messages(unsure_msgs, unsure_fld)
    if ham_msgs:
        client.uid('STORE', ','.join(ham_msgs), '-FLAGS', r'(\Seen)')

    client.close()

    return proc_msg_count

processedCounter = 0
hparser = email.Parser.Parser()
config = ConfigParser()
config.read(os.path.expanduser("~/.bogofilter-imap-train-rc"))

login = config.get("imap-training", "login")
password = config.get("imap-training", "password")
server = config.get("imap-training", "server")
client = imaplib.IMAP4_SSL(server)
client.login(login, password)

processedCounter += process_folder('INBOX', 'Junk', 'Unsure')

client.logout()

if processedCounter > 0:
    logging.info("Processed %d messages.", processedCounter)