summaryrefslogtreecommitdiffstats
path: root/check_bogofilter.py
blob: c36cf44cfa66a91095e6e75f239c00ec31bac9b3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/python
import imaplib
import logging
import os.path
import re
import subprocess
from collections import namedtuple
from ConfigParser import ConfigParser

logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
                    level=logging.WARNING)
log = logging.getLogger('check_bogofilter')

# imaplib.Debug = 4

pattern_uid = re.compile(r'\d+ \(UID (?P<uid>\d+)\)')

# Store capabilities present in the current server
# Add additional ones as needed
AvailableCapabilities = namedtuple('Capas', ['MOVE', 'UIDPLUS'])


def parse_uid(data):
    match = pattern_uid.match(data)
    return match.group('uid')


def move_messages(ids, target):
    # Eventually we may move messages in groups of say 50
    if len(ids) > 0:
        ids_str = ','.join(ids)
        client.uid('COPY', ids_str, target)
        client.uid('STORE', ids_str, '+FLAGS', r'(\Deleted)')
        if client._features_available.UIDPLUS:
            client.uid('EXPUNGE', ids_str)


def process_folder(proc_fld_name, target_fld, unsure_fld, spam_fld=None,
                   bogofilter_param=['-l']):

    ham_msgs = []
    spam_msgs = []
    unsure_msgs = []

    client.select(proc_fld_name)
    _, resp = client.search(None, "UNSEEN")
    log.debug('resp = %s', resp)
    messages = resp[0].split()
    logging.debug('messages = %s', messages)
    proc_msg_count = len(messages)

    for msgId in messages:
        logging.debug('msgId = %s', msgId)
        typ, msg_data = client.fetch(msgId, '(RFC822)')
        log.debug('fetch RFC822 result = %s', typ)

        # The -p (passthrough) option outputs the message with an
        # X-Bogosity line at the end of the message header. This requires
        # keeping the entire message in memory when it's read from stdin
        # (or from a pipe or socket). If the message is read from a file
        # that can be rewound, bogofilter will read it a second time.

        # The -e (embed) option tells bogofilter to exit with code 0 if
        # the message can be classified, i.e. if there is not an error.
        # Normally bogofilter uses different codes for spam, ham, and
        # unsure classifications, but this simplifies using bogofilter
        # with procmail or maildrop.

        ret = subprocess.Popen(['bogofilter'] + bogofilter_param,
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        out, err = ret.communicate(input=msg_data[0][1])
        ret_code = ret.returncode

        logging.debug("ret.returncode = %s", ret_code)
        log.debug('----------------------------\nerr:\n%s', err)

        typ, data = client.fetch(msgId, "(UID)")
        log.debug('fetch UID result = %s, %s', typ, data)
        uid = parse_uid(data[0])
        log.debug('UID = %s', uid)

        if ret_code == 0:  # spam
            spam_msgs.append(uid)
        elif ret_code == 1:  # ham
            ham_msgs.append(uid)
        elif ret_code == 2:  # unsure
            unsure_msgs.append(uid)
        else:  # 3 or something else I/O error
            raise IOError('Bogofilter failed with error %d', ret_code)

    move_messages(unsure_msgs, unsure_fld)
    log.debug('ham_msgs = %s, spam_msgs = %s' % (ham_msgs, spam_msgs))
    if ham_msgs:
        if spam_fld is None:
            client.uid('STORE', ','.join(ham_msgs), '-FLAGS', r'(\Seen)')
    if spam_msgs:
        if spam_fld is not None:
            move_messages(spam_msgs, spam_fld)
        else:
            client.uid('STORE', ','.join(spam_msgs),
                       '+FLAGS.SILENT', r'(\Deleted \Seen)')
            client.uid('EXPUNGE', ','.join(spam_msgs))

    client.close()

    return proc_msg_count


processedCounter = 0
config = ConfigParser()
config.read(os.path.expanduser("~/.bogofilter-imap-train-rc"))

login = config.get("imap-training", "login")
password = config.get("imap-training", "password")
server = config.get("imap-training", "server")
client = imaplib.IMAP4_SSL(server)
client._features_available = AvailableCapabilities(False, False)
client.login(login, password)

ok, dat = client.capability()
if ok != 'OK':
    raise client.error(dat[-1])
capas = dat[0].decode()
client._features_available = AvailableCapabilities._make(
    ['MOVE' in capas, 'UIDPLUS' in capas])

processedCounter += process_folder('INBOX', '_suspects', '_unsure', '_spam')

client.logout()

if processedCounter > 0:
    logging.info("Processed %d messages.", processedCounter)