diff options
Diffstat (limited to 'imapArch.py')
-rwxr-xr-x | imapArch.py | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/imapArch.py b/imapArch.py new file mode 100755 index 0000000..2ac91fd --- /dev/null +++ b/imapArch.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python3 +# note http://docs.python.org/lib/module-doctest.html +# resp. file:///usr/share/doc/python-docs-*/html/lib/module-doctest.html +import argparse +import collections +import configparser +import email +import email.header +import email.utils +import locale +import logging +import imaplib +import os +import re +import sys +from datetime import date, timedelta + +logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', + stream=sys.stdout, level=logging.INFO) +log = logging.getLogger('imapArch') + + +Capas = collections.namedtuple('Capas', ['MOVE', 'UIDPLUS']) +SEP_RE = re.compile(r'\s+"([/.])"\s+') +FOLDER_RE = re.compile(r'\s+"[/.]"\s+') + + +class Message(object): + """Abstraction over one email message.""" + + def __init__(self, folder, uid): + self.folder = folder + self.box = self.folder.box + self.uid = uid + self.msg = self.__get_body() + date_header = self.msg['Date'] + try: + self.date = email.utils.parsedate_to_datetime(date_header) + except (TypeError, ValueError): + log.warning('Cannot parse date string %s', date_header) + self.date = None + self.subject = self.__get_subject() + + def __get_body(self): + typ, data = self.box.uid('FETCH', '%s (RFC822)' % self.uid) + if typ == 'OK': + return email.message_from_bytes(data[0][1]) + else: + raise IOError('Cannot parse message %s:\n%s!' % + (self.uid, data[0][1])) + + def __get_subject(self): + out_str = '' + subj_str = self.msg['Subject'] + if subj_str is not None: + for raw_str, enc in email.header.decode_header(subj_str): + if enc == 'windows-874': + enc = 'iso8859_11' + elif enc is None or ('unknown' in enc): + enc = None + if isinstance(raw_str, bytes): + try: + out_str += raw_str.decode(enc or 'utf8', 'replace') + except LookupError: + log.error('Cannot find encoding %s', enc) + raise + else: + out_str += raw_str + return out_str + + def __str__(self): + return "%s in %s: %s (%s)" % (self.uid, self.folder.fld_name, + self.subject, self.date) + + +class Folder(object): + def __init__(self, server, fld_name, create=False): + self.server = server + self.box = server.box + self.fld_name = fld_name + self.selected = False + self.__create_missing = create + self.folder_sep = self.__get_separator() + + log.debug('self.__create_missing = %s, fld_name = %s, exists %s', + self.__create_missing, fld_name, + fld_name not in self.server.all_folders) + if self.__create_missing and (fld_name not in self.server.all_folders): + self.__create_folder() + + def select(self): + ok, _ = self.box.select(mailbox=self.fld_name) + if ok != 'OK': + raise IOError('Cannot select folder %s' % self.fld_name) + self.selected = True + return self + + def __create_folder(self): + sep = self.folder_sep + split_name = self.fld_name.split(sep) + log.debug('split_name = %s', split_name) + target = '' + for part in split_name: + log.debug('part = %s', part) + target += '{}{}'.format(part, self.folder_sep) + fld = target.strip(self.folder_sep) + log.debug('fld = %s', fld) + if fld not in self.server.all_folders: + log.debug('Creating folder %s', fld) + self.box.create(fld) + self.server.all_folders.append(fld) + self.box.subscribe(fld) + + def __list_folder(self, name=None, wildcards='*'): + if name is None: + name = '{}/'.format(self.fld_name) + ok, data = self.box.list(name, wildcards) + if ok != 'OK': + raise IOError('Cannot list folder %s' % self.fld_name) + log.debug('data = %s', data) + return data[0] + + def __get_separator(self): + # type: () -> None + """ + Find current folder hierarchy separator, either from the current + folder, or if it doesn't exist from INBOX. + """ + data = self.__list_folder() + + if data is None: + data = self.__list_folder('""', '""') + + parse_data = SEP_RE.search(data.decode()) + if parse_data is not None: + return parse_data.group(1) + else: + raise IOError('Cannot find folder separator from %s' % data) + + def __emails_search(self, search_type, date_str): + ok, res = self.box.uid('SEARCH', + '%s %s' % (search_type.upper(), date_str)) + if ok != 'OK': + raise IOError('SEARCH %s %s failed!' % + (search_type.lower(), date_str)) + + msgs = [] + uids = res[0].decode().split() + for uid in uids: + msgs.append(Message(self, uid)) + return msgs + + def emails_before(self, before_str): + return self.__emails_search('BEFORE', before_str) + + def get_archive_folder(self, msg, aroot): + raw_name = self.folder_sep.join((aroot, msg.date.strftime("%Y"), + self.fld_name)) + return raw_name.strip('/') + + def move_messages(self, messages): + assert self.selected is False, 'Target folder should not be selected.' + if self.box.features_present.MOVE: + # We cannot move self.box.uid('MOVE', messages, self.fld_name) + # because IMAP4_SSL.uid has a protection against unknown + # commands. https://bugs.python.org/issue33336 + typ, dat = self.box._simple_command('UID', 'MOVE', + messages, self.fld_name) + ok, data = self.box._untagged_response(typ, dat, 'MOVE') + log.debug('MOVE ok = %s, data = %s', ok, data) + if ok != 'OK': + raise IOError('Cannot move messages to folder %s' % + self.fld_name) + elif self.box.features_present.UIDPLUS: + ok, data = self.box.uid('COPY', messages, self.fld_name) + log.debug('COPY ok = %s, data = %s', ok, data) + if ok != 'OK': + raise IOError('Cannot copy messages to folder %s' % + self.fld_name) + ok, data = self.box.uid('STORE', + r'+FLAGS.SILENT (\DELETED)', messages) + log.debug('STORE ok = %s, data = %s', ok, data) + if ok != 'OK': + raise IOError('Cannot delete messages-') + ok, data = self.box.uid('EXPUNGE', messages) + log.debug('EXPUNGE ok = %s, data = %s', ok, data) + if ok != 'OK': + raise IOError('Cannot expunge messages.') + else: + ok, data = self.box.uid('COPY', messages, self.fld_name) + log.debug('COPY ok = %s, data = %s', ok, data) + if ok != 'OK': + raise IOError('Cannot copy messages to folder %s' % + self.fld_name) + ok, data = self.box.uid('STORE', + r'+FLAGS.SILENT (\DELETED)', messages) + log.debug('STORE ok = %s, data = %s', ok, data) + if ok != 'OK': + raise IOError('Cannot delete messages-') + + +class EmailServer(object): + def __init__(self, serverKey=None, archive_root=None, verbosity=0): + config = self.get_config() + acc_name = serverKey if serverKey is not None \ + else config['general']['account'] + self.cfg = dict(config.items(acc_name)) + self.archive_root = archive_root + self.box, self.all_folders = self.__login(**self.cfg) + if verbosity > 1: + self.box.debug = 4 + + def __login(self, host='localhost', username=None, password=None, + ssl=None): + box = imaplib.IMAP4_SSL(host=host) + ok, data = box.login(username, password) + if ok != 'OK': + raise IOError('Cannot login with credentials %s' % + str((host, username, password,))) + + ok, data = box.capability() + capas = data[0].decode() + log.debug('capas = %s', capas) + box.features_present = Capas._make( + ['MOVE' in capas, 'UIDPLUS' in capas]) + + ok, data = box.list() + if ok != 'OK': + raise IOError('Cannot list folders!') + + folders = [FOLDER_RE.split(x.decode())[1] for x in data] + + return box, folders + + @staticmethod + def get_config(): + # In case the configuration file is missing, only empty list will be + # returned + config = configparser.ConfigParser() + config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) + return config + + def archive_folder(self, folder_name, before_date): + # type: (str, date) -> None + """ + Archive one folder to the proper archiving positioins. + + :param: folder_name + :param: before_date + """ + print('Archiving {}'.format(folder_name)) + copy_cache = {} + fld = Folder(self, folder_name).select() + before_str = before_date.strftime('%d-%b-%Y') + for msg in fld.emails_before(before_str): + sys.stdout.write('.') + if msg.date is not None: + arch_folder = fld.get_archive_folder(msg, self.archive_root) + if arch_folder in copy_cache: + copy_cache[arch_folder].append(msg) + else: + copy_cache[arch_folder] = [msg] + sys.stdout.write('\n') + + for key in copy_cache: + log.debug('***** %s:', key) + for msg in copy_cache[key]: + log.debug('\tmsg: %s', str(msg)) + + # Go through the cache and make moves. + for arch_dir in copy_cache: + msg_ids = ','.join([x.uid for x in copy_cache[arch_dir]]) + log.debug('arch_dir = %s, msgs = %s', arch_dir, + msg_ids) + dir = Folder(self, arch_dir, create=True) + dir.move_messages(msg_ids) + + def list_folders(self): + return sorted(self.all_folders, key=str.lower) + + def __enter__(self): + return self + + def __exit__(self, *args): + if args != (None, None, None): + log.warning('args = %s', args) + self.box.close() + + +if __name__ == '__main__': + locale.setlocale(locale.LC_ALL, 'en_US') + argp = argparse.ArgumentParser() + subps = argp.add_subparsers(dest='cmd') + subp_servers = subps.add_parser('servers', + help='List available servers (by keywords)') + subp_list = subps.add_parser('list', help='List all folders') + subp_arc = subps.add_parser('archive', help='Archive folder[s]') + subp_arc.add_argument('-d', type=int, default=14, dest='days', + help='How old messages we should keep') + subp_arc.add_argument('-s', '--server', + help='Symbolic name of the server to be used') + subp_arc.add_argument('folder', help='Folder which should be archived') + subp_arc.add_argument('archive', + help='Root folder to store annual archives to') + argp.add_argument('-v', action='count', dest='verbosity', default=0, + help='Verbosity of the operation (-vv shows IMAP chatter)') + + args = argp.parse_args() + log.debug('args = %s', args) + + if args.verbosity > 0: + log.setLevel(logging.DEBUG) + + if args.cmd == 'servers': + config = EmailServer.get_config() + sects = set(config.keys()) - {'DEFAULT', 'general'} + print('Available servers:\n%s' % tuple(sects)) + elif args.cmd == 'list': + myStore = EmailServer(verbosity=args.verbosity) + folders = myStore.list_folders() + print('\n'.join(folders)) + else: + before = date.today() - timedelta(days=args.days) + with EmailServer(args.server, args.archive, verbosity=args.verbosity) as myStore: + myStore.archive_folder(args.folder, before) |