aboutsummaryrefslogtreecommitdiffstats
path: root/imapArch.py
diff options
context:
space:
mode:
Diffstat (limited to 'imapArch.py')
-rwxr-xr-ximapArch.py325
1 files changed, 325 insertions, 0 deletions
diff --git a/imapArch.py b/imapArch.py
new file mode 100755
index 0000000..2ac91fd
--- /dev/null
+++ b/imapArch.py
@@ -0,0 +1,325 @@
+#!/usr/bin/env python3
+# note http://docs.python.org/lib/module-doctest.html
+# resp. file:///usr/share/doc/python-docs-*/html/lib/module-doctest.html
+import argparse
+import collections
+import configparser
+import email
+import email.header
+import email.utils
+import locale
+import logging
+import imaplib
+import os
+import re
+import sys
+from datetime import date, timedelta
+
+logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
+ stream=sys.stdout, level=logging.INFO)
+log = logging.getLogger('imapArch')
+
+
+Capas = collections.namedtuple('Capas', ['MOVE', 'UIDPLUS'])
+SEP_RE = re.compile(r'\s+"([/.])"\s+')
+FOLDER_RE = re.compile(r'\s+"[/.]"\s+')
+
+
+class Message(object):
+ """Abstraction over one email message."""
+
+ def __init__(self, folder, uid):
+ self.folder = folder
+ self.box = self.folder.box
+ self.uid = uid
+ self.msg = self.__get_body()
+ date_header = self.msg['Date']
+ try:
+ self.date = email.utils.parsedate_to_datetime(date_header)
+ except (TypeError, ValueError):
+ log.warning('Cannot parse date string %s', date_header)
+ self.date = None
+ self.subject = self.__get_subject()
+
+ def __get_body(self):
+ typ, data = self.box.uid('FETCH', '%s (RFC822)' % self.uid)
+ if typ == 'OK':
+ return email.message_from_bytes(data[0][1])
+ else:
+ raise IOError('Cannot parse message %s:\n%s!' %
+ (self.uid, data[0][1]))
+
+ def __get_subject(self):
+ out_str = ''
+ subj_str = self.msg['Subject']
+ if subj_str is not None:
+ for raw_str, enc in email.header.decode_header(subj_str):
+ if enc == 'windows-874':
+ enc = 'iso8859_11'
+ elif enc is None or ('unknown' in enc):
+ enc = None
+ if isinstance(raw_str, bytes):
+ try:
+ out_str += raw_str.decode(enc or 'utf8', 'replace')
+ except LookupError:
+ log.error('Cannot find encoding %s', enc)
+ raise
+ else:
+ out_str += raw_str
+ return out_str
+
+ def __str__(self):
+ return "%s in %s: %s (%s)" % (self.uid, self.folder.fld_name,
+ self.subject, self.date)
+
+
+class Folder(object):
+ def __init__(self, server, fld_name, create=False):
+ self.server = server
+ self.box = server.box
+ self.fld_name = fld_name
+ self.selected = False
+ self.__create_missing = create
+ self.folder_sep = self.__get_separator()
+
+ log.debug('self.__create_missing = %s, fld_name = %s, exists %s',
+ self.__create_missing, fld_name,
+ fld_name not in self.server.all_folders)
+ if self.__create_missing and (fld_name not in self.server.all_folders):
+ self.__create_folder()
+
+ def select(self):
+ ok, _ = self.box.select(mailbox=self.fld_name)
+ if ok != 'OK':
+ raise IOError('Cannot select folder %s' % self.fld_name)
+ self.selected = True
+ return self
+
+ def __create_folder(self):
+ sep = self.folder_sep
+ split_name = self.fld_name.split(sep)
+ log.debug('split_name = %s', split_name)
+ target = ''
+ for part in split_name:
+ log.debug('part = %s', part)
+ target += '{}{}'.format(part, self.folder_sep)
+ fld = target.strip(self.folder_sep)
+ log.debug('fld = %s', fld)
+ if fld not in self.server.all_folders:
+ log.debug('Creating folder %s', fld)
+ self.box.create(fld)
+ self.server.all_folders.append(fld)
+ self.box.subscribe(fld)
+
+ def __list_folder(self, name=None, wildcards='*'):
+ if name is None:
+ name = '{}/'.format(self.fld_name)
+ ok, data = self.box.list(name, wildcards)
+ if ok != 'OK':
+ raise IOError('Cannot list folder %s' % self.fld_name)
+ log.debug('data = %s', data)
+ return data[0]
+
+ def __get_separator(self):
+ # type: () -> None
+ """
+ Find current folder hierarchy separator, either from the current
+ folder, or if it doesn't exist from INBOX.
+ """
+ data = self.__list_folder()
+
+ if data is None:
+ data = self.__list_folder('""', '""')
+
+ parse_data = SEP_RE.search(data.decode())
+ if parse_data is not None:
+ return parse_data.group(1)
+ else:
+ raise IOError('Cannot find folder separator from %s' % data)
+
+ def __emails_search(self, search_type, date_str):
+ ok, res = self.box.uid('SEARCH',
+ '%s %s' % (search_type.upper(), date_str))
+ if ok != 'OK':
+ raise IOError('SEARCH %s %s failed!' %
+ (search_type.lower(), date_str))
+
+ msgs = []
+ uids = res[0].decode().split()
+ for uid in uids:
+ msgs.append(Message(self, uid))
+ return msgs
+
+ def emails_before(self, before_str):
+ return self.__emails_search('BEFORE', before_str)
+
+ def get_archive_folder(self, msg, aroot):
+ raw_name = self.folder_sep.join((aroot, msg.date.strftime("%Y"),
+ self.fld_name))
+ return raw_name.strip('/')
+
+ def move_messages(self, messages):
+ assert self.selected is False, 'Target folder should not be selected.'
+ if self.box.features_present.MOVE:
+ # We cannot move self.box.uid('MOVE', messages, self.fld_name)
+ # because IMAP4_SSL.uid has a protection against unknown
+ # commands. https://bugs.python.org/issue33336
+ typ, dat = self.box._simple_command('UID', 'MOVE',
+ messages, self.fld_name)
+ ok, data = self.box._untagged_response(typ, dat, 'MOVE')
+ log.debug('MOVE ok = %s, data = %s', ok, data)
+ if ok != 'OK':
+ raise IOError('Cannot move messages to folder %s' %
+ self.fld_name)
+ elif self.box.features_present.UIDPLUS:
+ ok, data = self.box.uid('COPY', messages, self.fld_name)
+ log.debug('COPY ok = %s, data = %s', ok, data)
+ if ok != 'OK':
+ raise IOError('Cannot copy messages to folder %s' %
+ self.fld_name)
+ ok, data = self.box.uid('STORE',
+ r'+FLAGS.SILENT (\DELETED)', messages)
+ log.debug('STORE ok = %s, data = %s', ok, data)
+ if ok != 'OK':
+ raise IOError('Cannot delete messages-')
+ ok, data = self.box.uid('EXPUNGE', messages)
+ log.debug('EXPUNGE ok = %s, data = %s', ok, data)
+ if ok != 'OK':
+ raise IOError('Cannot expunge messages.')
+ else:
+ ok, data = self.box.uid('COPY', messages, self.fld_name)
+ log.debug('COPY ok = %s, data = %s', ok, data)
+ if ok != 'OK':
+ raise IOError('Cannot copy messages to folder %s' %
+ self.fld_name)
+ ok, data = self.box.uid('STORE',
+ r'+FLAGS.SILENT (\DELETED)', messages)
+ log.debug('STORE ok = %s, data = %s', ok, data)
+ if ok != 'OK':
+ raise IOError('Cannot delete messages-')
+
+
+class EmailServer(object):
+ def __init__(self, serverKey=None, archive_root=None, verbosity=0):
+ config = self.get_config()
+ acc_name = serverKey if serverKey is not None \
+ else config['general']['account']
+ self.cfg = dict(config.items(acc_name))
+ self.archive_root = archive_root
+ self.box, self.all_folders = self.__login(**self.cfg)
+ if verbosity > 1:
+ self.box.debug = 4
+
+ def __login(self, host='localhost', username=None, password=None,
+ ssl=None):
+ box = imaplib.IMAP4_SSL(host=host)
+ ok, data = box.login(username, password)
+ if ok != 'OK':
+ raise IOError('Cannot login with credentials %s' %
+ str((host, username, password,)))
+
+ ok, data = box.capability()
+ capas = data[0].decode()
+ log.debug('capas = %s', capas)
+ box.features_present = Capas._make(
+ ['MOVE' in capas, 'UIDPLUS' in capas])
+
+ ok, data = box.list()
+ if ok != 'OK':
+ raise IOError('Cannot list folders!')
+
+ folders = [FOLDER_RE.split(x.decode())[1] for x in data]
+
+ return box, folders
+
+ @staticmethod
+ def get_config():
+ # In case the configuration file is missing, only empty list will be
+ # returned
+ config = configparser.ConfigParser()
+ config.read(os.path.expanduser('~/.config/imap_archiver.cfg'))
+ return config
+
+ def archive_folder(self, folder_name, before_date):
+ # type: (str, date) -> None
+ """
+ Archive one folder to the proper archiving positioins.
+
+ :param: folder_name
+ :param: before_date
+ """
+ print('Archiving {}'.format(folder_name))
+ copy_cache = {}
+ fld = Folder(self, folder_name).select()
+ before_str = before_date.strftime('%d-%b-%Y')
+ for msg in fld.emails_before(before_str):
+ sys.stdout.write('.')
+ if msg.date is not None:
+ arch_folder = fld.get_archive_folder(msg, self.archive_root)
+ if arch_folder in copy_cache:
+ copy_cache[arch_folder].append(msg)
+ else:
+ copy_cache[arch_folder] = [msg]
+ sys.stdout.write('\n')
+
+ for key in copy_cache:
+ log.debug('***** %s:', key)
+ for msg in copy_cache[key]:
+ log.debug('\tmsg: %s', str(msg))
+
+ # Go through the cache and make moves.
+ for arch_dir in copy_cache:
+ msg_ids = ','.join([x.uid for x in copy_cache[arch_dir]])
+ log.debug('arch_dir = %s, msgs = %s', arch_dir,
+ msg_ids)
+ dir = Folder(self, arch_dir, create=True)
+ dir.move_messages(msg_ids)
+
+ def list_folders(self):
+ return sorted(self.all_folders, key=str.lower)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ if args != (None, None, None):
+ log.warning('args = %s', args)
+ self.box.close()
+
+
+if __name__ == '__main__':
+ locale.setlocale(locale.LC_ALL, 'en_US')
+ argp = argparse.ArgumentParser()
+ subps = argp.add_subparsers(dest='cmd')
+ subp_servers = subps.add_parser('servers',
+ help='List available servers (by keywords)')
+ subp_list = subps.add_parser('list', help='List all folders')
+ subp_arc = subps.add_parser('archive', help='Archive folder[s]')
+ subp_arc.add_argument('-d', type=int, default=14, dest='days',
+ help='How old messages we should keep')
+ subp_arc.add_argument('-s', '--server',
+ help='Symbolic name of the server to be used')
+ subp_arc.add_argument('folder', help='Folder which should be archived')
+ subp_arc.add_argument('archive',
+ help='Root folder to store annual archives to')
+ argp.add_argument('-v', action='count', dest='verbosity', default=0,
+ help='Verbosity of the operation (-vv shows IMAP chatter)')
+
+ args = argp.parse_args()
+ log.debug('args = %s', args)
+
+ if args.verbosity > 0:
+ log.setLevel(logging.DEBUG)
+
+ if args.cmd == 'servers':
+ config = EmailServer.get_config()
+ sects = set(config.keys()) - {'DEFAULT', 'general'}
+ print('Available servers:\n%s' % tuple(sects))
+ elif args.cmd == 'list':
+ myStore = EmailServer(verbosity=args.verbosity)
+ folders = myStore.list_folders()
+ print('\n'.join(folders))
+ else:
+ before = date.today() - timedelta(days=args.days)
+ with EmailServer(args.server, args.archive, verbosity=args.verbosity) as myStore:
+ myStore.archive_folder(args.folder, before)