#!/usr/bin/env python3.6 # note http://docs.python.org/lib/module-doctest.html # resp. file:///usr/share/doc/python-docs-*/html/lib/module-doctest.html import argparse import collections import configparser import email import email.header import email.utils import functools import locale import logging import imaplib import os import re import sys from datetime import date, timedelta logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger('imapArch') class ServerError(IOError): pass class FolderError(IOError): pass class MessageError(IOError): pass Capas = collections.namedtuple('Capas', ['MOVE', 'UIDPLUS']) SEP_RE = re.compile(r'\s+"([/.])"\s+') FOLDER_RE = re.compile(r'\s+"[/.]"\s+') class Message(object): """Abstraction over one email message.""" def __init__(self, client, uid): self.client = client self.uid = uid self.msg = self.__get_body() self.date = email.utils.parsedate_to_datetime(self.msg['Date']) self.subject = self.__get_subject() def __get_body(self): typ, data = self.client.uid('FETCH', '%s (RFC822)' % self.uid) if typ == 'OK': return email.message_from_bytes(data[0][1]) else: raise MessageError('Cannot parse message %s:\n%s!' % (self.uid, data[0][1])) def __get_subject(self): out_str = '' for raw_str, enc in email.header.decode_header(self.msg['Subject']): if isinstance(raw_str, bytes): out_str += raw_str.decode(enc or 'utf8') else: out_str += raw_str return out_str def __str__(self): return "%s: %s (%s)" % (self.uid, self.subject, self.date) class Folder(object): def __init__(self, box, fld_name, create=False): self.box = box self.fld_name = fld_name self.selected = False self.__create_missing = create self.folder_sep = self.__get_separator() if self.__create_missing and self.__list_folder() is None: self.__create_folder() def select(self): ok, _ = self.box.select(mailbox=self.fld_name) if ok != 'OK': raise FolderError('Cannot select folder %s' % self.fld_name) self.selected = True return self def __create_folder(self): sep = self.folder_sep split_name = self.fld_name.split(sep) target = '' for part in split_name: target += '{}{}'.format(part, self.folder_sep) if self.__list_folder(target) is None: self.box.create(target) self.box.subscribe(target) self.__list_folder.cache_clear() @functools.lru_cache() def __list_folder(self, name=None, wildcards='*'): if name is None: name = self.fld_name ok, data = self.box.list(name, wildcards) if ok != 'OK': raise ServerError('Cannot list folder %s' % self.fld_name) return data[0] def __get_separator(self): # type: () -> None """ Find current folder hierarchy separator, either from the current folder, or if it doesn't exist from INBOX. """ data = self.__list_folder() if data is None: data = self.__list_folder('""', '""') parse_data = SEP_RE.search(data.decode()) if parse_data is not None: return parse_data.group(1) else: raise ServerError('Cannot find folder separator from %s' % data) def __emails_search(self, search_type, date_str): ok, res = self.box.uid('SEARCH', '%s %s' % (search_type.upper(), date_str)) if ok != 'OK': raise MessageError('SEARCH %s %s failed!' % (search_type.lower(), date_str)) msgs = [] for uid in res[0].decode().split(' '): msgs.append(Message(self.box, uid)) return msgs def emails_before(self, before_str): return self.__emails_search('BEFORE', before_str) def get_archive_folder(self, msg, aroot): return self.folder_sep.join((aroot, msg.date.strftime("%Y"), self.fld_name)) def move_messages(self, messages): assert self.selected == False, 'Target folder should not be selected.' if self.box.features_present.MOVE: ok, data = self.box.uid('MOVE', messages, self.fld_name) log.debug('MOVE ok = %s, data = %s', ok, data) if ok != 'OK': raise FolderError('Cannot move messages to folder %s' % self.fld_name) elif self.box.features_present.UIDPLUS: ok, data = self.box.uid('COPY', messages, self.fld_name) log.debug('COPY ok = %s, data = %s', ok, data) if ok != 'OK': raise FolderError('Cannot copy messages to folder %s' % self.fld_name) ok, data = self.box.uid('STORE', r'+FLAGS.SILENT (\DELETED)', messages) log.debug('STORE ok = %s, data = %s', ok, data) if ok != 'OK': raise FolderError('Cannot delete messages-') ok, data = self.box.uid('EXPUNGE', messages) log.debug('EXPUNGE ok = %s, data = %s', ok, data) if ok != 'OK': raise FolderError('Cannot expunge messages.') else: ok, data = self.box.uid('COPY', messages, self.fld_name) log.debug('COPY ok = %s, data = %s', ok, data) if ok != 'OK': raise FolderError('Cannot copy messages to folder %s' % self.fld_name) ok, data = self.box.uid('STORE', r'+FLAGS.SILENT (\DELETED)', messages) log.debug('STORE ok = %s, data = %s', ok, data) if ok != 'OK': raise FolderError('Cannot delete messages-') class EmailServer(object): def __init__(self, serverKey=None, archive_root=None): config = self.get_config() acc_name = serverKey if serverKey is not None \ else config['general']['account'] self.cfg = dict(config.items(acc_name)) self.archive_root = archive_root self.box = self.__login(**self.cfg) def __login(self, host='localhost', username=None, password=None, ssl=None): box = imaplib.IMAP4_SSL(host=host) ok, data = box.login(username, password) if ok != 'OK': raise ServerError('Cannot login with credentials %s' % str((host, username, password,))) ok, data = box.capability() capas = data[0].decode() box.features_present = Capas._make(['MOVE' in capas, 'UIDPLUS' in capas]) return box @staticmethod def get_config(): # In case the configuration file is missing, only empty list will be # returned config = configparser.ConfigParser() config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) return config def archive_folder(self, folder_name, before_date): # type: (str, datetime.date) -> None """ Archive one folder to the proper archiving positioins. :param: folder_name :param: before_date """ copy_cache = {} fld = Folder(self.box, folder_name).select() before_str = before_date.strftime('%d-%b-%Y') for msg in fld.emails_before(before_str): arch_folder = fld.get_archive_folder(msg, self.archive_root) if arch_folder in copy_cache: copy_cache[arch_folder].append(msg) else: copy_cache[arch_folder] = [msg] for key in copy_cache: log.info('***** %s:', key) for msg in copy_cache[key]: log.info('\tmsg: %s', str(msg)) # Go through the cache and make moves. for arch_dir in copy_cache: msg_ids = ','.join([x.uid for x in copy_cache[arch_dir]]) log.debug('arch_dir = %s, msgs = %s', arch_dir, msg_ids) dir = Folder(self.box, arch_dir, create=True) # dir.move_messages(msg_ids) def list_folders(self): ok, data = self.box.list() if ok != 'OK': raise IOError('Cannot list folders!') return sorted([FOLDER_RE.split(x.decode())[1] for x in data], key=str.lower) def __enter__(self): return self def __exit__(self, *args): if args != (None, None, None): log.warning('args = %s', args) self.box.close() if __name__ == '__main__': locale.setlocale(locale.LC_ALL, 'en_US') argp = argparse.ArgumentParser() subps = argp.add_subparsers(dest='cmd') subp_servers = subps.add_parser('servers', help='List available servers (by keywords)') subp_list = subps.add_parser('list', help='List all folders') subp_arc = subps.add_parser('archive', help='Archive folder[s]') subp_arc.add_argument('-d', type=int, default=14, dest='days', help='How old messages we should keep') subp_arc.add_argument('-s', '--server', help='Symbolic name of the server to be used') subp_arc.add_argument('folder', help='Folder which should be archived') subp_arc.add_argument('archive', help='Root folder to store annual archives to') args = argp.parse_args() if args.cmd == 'servers': config = EmailServer.get_config() sects = set(config.keys()) - {'DEFAULT', 'general'} print('Available servers:\n%s' % tuple(sects)) elif args.cmd == 'list': myStore = EmailServer() folders = myStore.list_folders() print('\n'.join(folders)) else: before = date.today() - timedelta(days=args.days) with EmailServer(args.server, args.archive) as myStore: myStore.archive_folder(args.folder, before)