diff options
author | Matěj Cepl <mcepl@cepl.eu> | 2018-04-20 17:25:11 +0200 |
---|---|---|
committer | Matěj Cepl <mcepl@cepl.eu> | 2018-04-20 17:25:11 +0200 |
commit | 1e87030163b836dffd9ea9f20d5a442f302b0efb (patch) | |
tree | f8b96c7d654b66ff3087a7ac6f3d268dd469b54a | |
parent | 302a4932c633e79d34043250a2701c72d9a34d2f (diff) | |
download | imapArch-1e87030163b836dffd9ea9f20d5a442f302b0efb.tar.gz |
Some progress
-rwxr-xr-x | archive_folder.py | 323 | ||||
-rwxr-xr-x | listFolders.py | 28 | ||||
-rw-r--r-- | previousAttempts/archiveIMAP.pl (renamed from archiveIMAP.pl) | 0 |
3 files changed, 171 insertions, 180 deletions
diff --git a/archive_folder.py b/archive_folder.py index 7859bea..d4aca54 100755 --- a/archive_folder.py +++ b/archive_folder.py @@ -9,213 +9,186 @@ import argparse import configparser import email +import email.header +import email.utils +import locale import logging +import imaplib import os +import pprint import sys -from datetime import date +from datetime import date, timedelta logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger('imapArch') -# from java.util import Properties, Date -# from java.lang import System -# from javax.mail import * -# from jarray import array - - -class FolderNotFoundError(IOError): +class ServerError(IOError): pass -def __getMessageDate(msg): - """ - Return the date of the message - - @param msg analyzed message - @return GregorianCalendar of the messages date - @throws MessagingException - """ - dateMS = msg.getReceivedDate().getTime() // 1000 - dateStruct = date.fromtimestamp(dateMS) - return dateStruct - +class FolderError(IOError): + pass -class ArchivableFolder(list): - def __init__(self, source, year): - """ - Constructor for the folder. - - @param source folder from the messages should be archived - @param year int of the year for which the messages are - archived - """ - self.sourceFolder = source - targetName = self.__getArchiveFolderName(source.getFullName(), year) - self.targetFolder = self.sourceFolder.getFolder(targetName) +class MessageError(IOError): + pass - def __getArchiveFolderName(self, srcFldName, year): - """ - @param folder string with the folder name - @param year int - @return - """ - archFolder = "INBOX/Archiv/" - rootLen = len("INBOX/") - if not(srcFldName[:rootLen] == "/INBOX/"): - raise FolderNotFoundError( - "We expect all folders to be under INBOX folder.") - baseName = srcFldName[rootLen:] - archFolder = "/" + archFolder + year + "/" + baseName - return archFolder - - def add(self, msg): - self.append(msg) - - def doArchive(self): - for message in self: - log.debug("Moving %s from %s to %s.", - message, self.sourceFolder.getFullName(), - self.targetFolder.getFullName()) - # self.sourceFolder.copyMessages(array(self, Message), - # self.targetFolder) - # for message in self: - # message.setFlag(Flags.Flag.DELETED, true) - # self.sourceFolder.expunge() - - -class Archives(dict): - """Collects archivable folders indexed by tuple of - folderName and year - """ - - def __init__(self): - pass - - def add(self, msg): - """ - Check whether the ArchivableFolder (@see ArchivableFolder) - for the particular year actually exists, and if not then create - it. In any event add - @param msg - """ - fld = msg.getFolder() - msgDate = __getMessageDate(msg) - year = msgDate.year - if not(self.search(fld, year)): - archfld = ArchivableFolder(fld, year) - self[self.__createKey(fld, year)] = archfld +def get_config(): + config = configparser.ConfigParser() + config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) + return config + + +class Message(object): + """Abstraction over one email message.""" + def __init__(self, client, uid): + self.client = client + self.uid = uid + self.msg = self.__get_body() + self.date = email.utils.parsedate_to_datetime(self.msg['Date']) + self.subject = self.__get_subject() + + def __get_body(self): + typ, data = self.client.uid('FETCH', '%s (RFC822)' % self.uid) + if typ == 'OK': + return email.message_from_bytes(data[0][1]) + else: + raise MessageError('Cannot parse message %s:\n%s!' % + (self.uid, data[0][1])) + + def __get_subject(self): + out_str = '' + for raw_str, enc in email.header.decode_header(self.msg['Subject']): + if isinstance(raw_str, bytes): + out_str += raw_str.decode(enc or 'utf8') + else: + out_str += raw_str + return out_str + + def __str__(self): + return "%s: %s (%s)" % (self.uid, self.subject, self.date) + + +class Folder(object): + def __init__(self, box, folder_sep, fld_name): + self.box = box + self.fld_name = fld_name + self.folder_separator = folder_sep + self.selected = False + + def select(self): + ret = self.box.select(mailbox=self.fld_name) + self.selected = True + return self - def __createKey(self, name, year): - """ - Create a key for the list - @param fldName String with the full name of the folder - @param year int of the year of the messages to be stored - there - @return tuple consisting from the both parameters of - this function. - """ - return(name, year) + def __emails_search(self, search_type, date_str): + ok, res = self.box.uid('SEARCH', '%s %s' % (search_type.upper(), date_str)) + if ok != 'OK': + raise MessageError('SEARCH %s %s failed!' % + (search_type.lower(), date_str)) - def search(self, fld, year): - """ - Find out whether the object with the key consisting of - the folder name and year exists in this object. + msgs = [] + for uid in res[0].decode().split(' '): + msgs.append(Message(self.box, uid)) + return msgs - @param fld Folder where the message is stored - @param year int year of the message date - @return boolean saying whether the folder for this message - has been already added to this object. - """ - key = self.__createKey(fld.getFullName(), year) - return key in self + def emails_before(self, before_str): + return self.__emails_search('BEFORE', before_str) - def archive(self): - for key in self: - self.doArchive() + def get_archive_folder(self, msg, aroot): + return self.folder_separator.join((aroot, msg.date.strftime("%Y"), self.fld_name)) -class ArchivedStore(object): - def __init__(self, serverKey=None): - config = configparser.ConfigParser() - config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) +class EmailServer(object): + def __init__(self, serverKey=None, archive_root=None): + config = get_config() acc_name = serverKey if serverKey is not None \ else config['general']['account'] self.cfg = dict(config.items(acc_name)) - # if debug: - # self.session.setDebug(True) - # try: - # self.store = self.session.getStore("imaps") - # except: - # print >> sys.stderr, "Cannot get Store" - # raise - - self._threeMonthAgo = date.today() - newmonth = self._threeMonthAgo.month - 3 - self._threeMonthAgo = self._threeMonthAgo.replace(month=newmonth) - - log.debug("host = %s, user = %s, password = %s", - self.cfg['host'], self.cfg['username'], - self.cfg['password']) - - self.__login(host, user, password) - - def __login(self, server, user, password): - try: - self.store.connect(server, user, password) - except: - log.debug("Cannot connect to %s as %s with password %s", - server, user, password) - raise - - def archive(self, from_folder, archive_base): - # type: (str, str) -> None - inboxfolder = self.store.getDefaultFolder().getFolder("INBOX") - folderList = inboxfolder.list('*') - - for folder in folderList: - if folder.getFullName()[:len("INBOX/Archiv")] != "INBOX/Archiv": - archMsgsCnt = self.__archiveFolder(folder) - # folder.close(False) - print("Processed messages = %d" % archMsgsCnt) - - def __archiveFolder(self, fld): - fld.open(Folder.READ_WRITE) - for msg in fld.getMessages(): - msgDate = __getMessageDate(msg) - print >>sys.stderr, str(msgDate), str(self._threeMonthAgo) - if msgDate < self._threeMonthAgo: - archFolder = self.__getArchiveFolderName(msg) -# print >> sys.stderr, archFolder -# fld.copyMessages(array([msg],type(msg)), archFolder) -# msg.setFlag(Flags.Flag.DELETED, true) - print("%s -> %s : %s" % (fld.getFullName(), - archFolder.getFullName(), msgDate)) - folderLen = len(fld.getMessages()) - fld.close(False) - return(folderLen) + self.archive_root = archive_root + + self.__box = self.__login(**self.cfg) + self.__folder_sep = self.__get_separator() + + def __login(self, host='localhost', username=None, password=None, ssl=None): + box = imaplib.IMAP4_SSL(host=host) + ok, data = box.login(username, password) + if ok == 'OK': + return box + else: + raise ServerError('Cannot login with credentials %s' % + str((host, username, password,))) + + def __get_separator(self): + # type: () -> None + ok, data = self.__box.list('""', '""') + if ok != 'OK': + raise ServerError('Cannot list known folders') + + data = data[0].decode().split(' ') + if len(data) == 3: + return data[1].strip(' "') + else: + raise ServerError('Cannot find folder separator from %s' % data) + + def archive_folder(self, folder_name, before_date): + # type: (str, datetime.date) -> None + """ + Archive one folder to the proper archiving positioins. + + :param: folder_name + :param: before_date + """ + copy_cache = {} + fld = Folder(self.__box, self.__folder_sep, folder_name).select() + before_str = before_date.strftime('%d-%b-%Y') + for msg in fld.emails_before(before_str): + arch_folder = fld.get_archive_folder(msg, self.archive_root) + if arch_folder in copy_cache: + copy_cache[arch_folder].append(msg) + else: + copy_cache[arch_folder] = [msg] + + for key in copy_cache: + log.info('\n\n%s:', key) + for msg in copy_cache[key]: + log.info('\tmsg: %s', str(msg)) + + # TODO: Projdi keš a proveď operaci def __enter__(self): return self - def __exit__(self): - pass # FIXME self.something.close() + def __exit__(self, *args): + if args != (None, None, None): + log.warning('args = %s', args) + else: + self.__box.close() if __name__ == '__main__': + locale.setlocale(locale.LC_ALL, 'en_US') argp = argparse.ArgumentParser() - argp.add_argument('-d', - help='How old messages we should keep') - argp.add_argument('server', - help='Symbolic name of the server to be used') - argp.add_argument('folder', - help='Folder which should be archived') - argp.add_argument('archive', - help='Root folder to store annual archives to') + subps = argp.add_subparsers(dest='cmd') + subp_list = subps.add_parser('servers', help='List available servers (by keywords)') + subp_arc = subps.add_parser('archive', help='Archive folder[s]') + subp_arc.add_argument('-d', type=int, default=14, dest='days', + help='How old messages we should keep') + subp_arc.add_argument('-s', '--server', default='localhost', + help='Symbolic name of the server to be used') + subp_arc.add_argument('folder', help='Folder which should be archived') + subp_arc.add_argument('archive', help='Root folder to store annual archives to') + args = argp.parse_args() - with ArchivedStore(args.server) as myStore: - myStore.archive(args.folder, args.archive) + if args.cmd == 'list': + config = get_config() + sects = set(config.keys()) - {'DEFAULT', 'general'} + print('Available servers:\n%s' % tuple(sects)) + else: + before = date.today() - timedelta(days=args.days) + with EmailServer(args.server, args.archive) as myStore: + myStore.archive_folder(args.folder, before) diff --git a/listFolders.py b/listFolders.py index db75596..b815d30 100755 --- a/listFolders.py +++ b/listFolders.py @@ -1,14 +1,32 @@ #!/usr/bin/env python3.6 import configparser +import logging import os.path -import imapy +import imaplib + +logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', + level=logging.DEBUG) +log = logging.getLogger('listFolders') config = configparser.ConfigParser() config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) -cfg = dict(config.items(config['general']['account'])) -box = imapy.connect(**cfg) +acc_name = config['general']['account'] +cfg = dict(config.items(acc_name)) + +box = imaplib.IMAP4_SSL(host=cfg['host']) +ok, data = box.login(cfg['username'], cfg['password']) +if ok != 'OK': + raise IOError('Cannot login with credentials %s' % str(cfg)) + +ok, data = box.list('""', '""') +sep = data[0].split()[1].decode() + +ok, data = box.list() +if ok != 'OK': + raise IOError('Cannot list known folders') -for fold in box.folders(): - print(fold) +for fld in data: + spl_fld = fld.decode().split(sep) + print(spl_fld[1].strip()) diff --git a/archiveIMAP.pl b/previousAttempts/archiveIMAP.pl index a1833de..a1833de 100644 --- a/archiveIMAP.pl +++ b/previousAttempts/archiveIMAP.pl |