From 1e87030163b836dffd9ea9f20d5a442f302b0efb Mon Sep 17 00:00:00 2001 From: Matěj Cepl Date: Fri, 20 Apr 2018 17:25:11 +0200 Subject: Some progress --- archiveIMAP.pl | 144 ------------------ archive_folder.py | 323 ++++++++++++++++++---------------------- listFolders.py | 28 +++- previousAttempts/archiveIMAP.pl | 144 ++++++++++++++++++ 4 files changed, 315 insertions(+), 324 deletions(-) delete mode 100644 archiveIMAP.pl create mode 100644 previousAttempts/archiveIMAP.pl diff --git a/archiveIMAP.pl b/archiveIMAP.pl deleted file mode 100644 index a1833de..0000000 --- a/archiveIMAP.pl +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; -use Mail::IMAPClient; -use IO::Socket::SSL; -use Data::Dumper; -use DateTime; -use DateTime::Format::Strptime; -use Config::IniFiles; - -# possible values are currently -- zimbra, localhost, pobox -my $account = "localhost"; - -# How many months before today the cut date should be? -my $howManyMonths = 3; -my $debug = 0; - -# get configuration for the account -my $conf = Config::IniFiles->new( -file => "/home/matej/.bugzillarc"); -die "No configuration for account $account" unless $conf->SectionExists($account); -my $hostname = $conf->val($account,'host'); -my $login = $conf->val($account,'name'); -my $password = $conf->val($account,'password'); -my $ssl= $conf->val($account,'ssl'); - -sub getTargetFolder { - my $source = shift; - my $year = shift; - - $source =~ s/^\/*(.*)\/*$/$1/; - return "/INBOX/Archiv/" . $year . '/' . $source; -} - -# makes sure that the folder including its parents -# RFC2060 says in 6.3.3 that server SHOULD create -# parents, so just to be sure if it doesn't. -sub assureFolder { - my $imaphandle = shift; - my $fullfoldername = shift; - my $sep = $imap->separator($fullfoldername); - - if ($imaphandle->exists($fullfoldername)) { - return 1; - } - my $parentpath = join ($sep, - (split /$sep/,$fullfoldername)[0,-1] - ); - assureFolder($imaphandle,$parentpath); - $imaphandle->create($fullfoldername) or - die "Unable to create folder $fullfoldername"; -} - -our $Strp = new DateTime::Format::Strptime( - pattern => '%a, %d %b %Y %H:%M:%S %z' -); -our $StrpNoTZ = new DateTime::Format::Strptime( - pattern => '%a, %d %b %Y %H:%M:%S' -); - -sub getMessageYear { - my $msgStr = shift; - my $msgDt = $Strp->parse_datetime($msgStr); - - if (!$msgDt) { - $msgDt = $StrpNoTZ->parse_datetime($msgStr); - } - if (!$msgDt) { - print "Date EMPTY.\n"; - return ""; # TODO: message without Date: - # not sure what to do about it - # Currently just do nothing and - # return empty string. - } - my $year = $msgDt->year; - if ($debug) { - print "\$msgStr = $msgStr, \$msgDt = $msgDt, year = $year\n"; - } - - return $year; -} - -my $imap = Mail::IMAPClient->new(); -if ($ssl) { - my $sslSocket=IO::Socket::SSL->new("$hostname:imaps"); - die ("Error connecting - $@") unless defined $sslSocket; - $sslSocket->autoflush(1); - - $imap = Mail::IMAPClient->new( - Server => $hostname, - Socket => $sslSocket, - User => $login, - Debug => $debug, - Password => $password, - UID => 1 - ) or die "Cannot connect to localhost as matej: $@"; -} else { - $imap = Mail::IMAPClient->new( - Server => $hostname, - User => $login, - Debug => $debug, - Password => $password, - UID => 1 - ) or die "Cannot connect to localhost as matej: $@"; -} - -my $cutDate = DateTime->now(); -$cutDate->add( months => -$howManyMonths ); - -my @sourceFolders = grep(!/^INBOX\/Archiv/,$imap->folders()); -my %targetedMessages; -my ($msgYear,$msgDateStr,$targetFolder); - -foreach my $folder (@sourceFolders) { - $imap->select($folder); - die "Cannot select folder $folder\n" if $@; - my @msgsProc = $imap->search(" UNDELETED BEFORE " . $cutDate->strftime("%d-%b-%Y")); - if ($#msgsProc > 0) { - print "Move $#msgsProc in $folder.\n"; - foreach my $msg (@msgsProc) { - $msgYear = getMessageYear($imap->date($msg)); - if ($msgYear !~ /^\s*$/) { - $targetFolder = getTargetFolder($folder,$msgYear); - if ($debug) { - print "Move message $msg from the folder $folder to $targetFolder.\n"; - } - push ( @{ $targetedMessages{$folder} } , $msg); - } - } - } - foreach my $tFolder (keys %targetedMessages) { - if (!($imap->exists($tFolder))) { - # Not sure how would following deal with non-existent - # parent folder, so rather recursively create - # parents. - # - #$imap->create($tFolder) - # or die "Could not create $tFolder: $@\n"; - assureFolder($imap,$tFolder); - } - $imap->move($tFolder,$targetedMessages{$tFolder}); - } -} -$imap->close(); \ No newline at end of file diff --git a/archive_folder.py b/archive_folder.py index 7859bea..d4aca54 100755 --- a/archive_folder.py +++ b/archive_folder.py @@ -9,213 +9,186 @@ import argparse import configparser import email +import email.header +import email.utils +import locale import logging +import imaplib import os +import pprint import sys -from datetime import date +from datetime import date, timedelta logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger('imapArch') -# from java.util import Properties, Date -# from java.lang import System -# from javax.mail import * -# from jarray import array - - -class FolderNotFoundError(IOError): +class ServerError(IOError): pass -def __getMessageDate(msg): - """ - Return the date of the message - - @param msg analyzed message - @return GregorianCalendar of the messages date - @throws MessagingException - """ - dateMS = msg.getReceivedDate().getTime() // 1000 - dateStruct = date.fromtimestamp(dateMS) - return dateStruct - +class FolderError(IOError): + pass -class ArchivableFolder(list): - def __init__(self, source, year): - """ - Constructor for the folder. - - @param source folder from the messages should be archived - @param year int of the year for which the messages are - archived - """ - self.sourceFolder = source - targetName = self.__getArchiveFolderName(source.getFullName(), year) - self.targetFolder = self.sourceFolder.getFolder(targetName) +class MessageError(IOError): + pass - def __getArchiveFolderName(self, srcFldName, year): - """ - @param folder string with the folder name - @param year int - @return - """ - archFolder = "INBOX/Archiv/" - rootLen = len("INBOX/") - if not(srcFldName[:rootLen] == "/INBOX/"): - raise FolderNotFoundError( - "We expect all folders to be under INBOX folder.") - baseName = srcFldName[rootLen:] - archFolder = "/" + archFolder + year + "/" + baseName - return archFolder - - def add(self, msg): - self.append(msg) - - def doArchive(self): - for message in self: - log.debug("Moving %s from %s to %s.", - message, self.sourceFolder.getFullName(), - self.targetFolder.getFullName()) - # self.sourceFolder.copyMessages(array(self, Message), - # self.targetFolder) - # for message in self: - # message.setFlag(Flags.Flag.DELETED, true) - # self.sourceFolder.expunge() - - -class Archives(dict): - """Collects archivable folders indexed by tuple of - folderName and year - """ - - def __init__(self): - pass - - def add(self, msg): - """ - Check whether the ArchivableFolder (@see ArchivableFolder) - for the particular year actually exists, and if not then create - it. In any event add - @param msg - """ - fld = msg.getFolder() - msgDate = __getMessageDate(msg) - year = msgDate.year - if not(self.search(fld, year)): - archfld = ArchivableFolder(fld, year) - self[self.__createKey(fld, year)] = archfld +def get_config(): + config = configparser.ConfigParser() + config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) + return config + + +class Message(object): + """Abstraction over one email message.""" + def __init__(self, client, uid): + self.client = client + self.uid = uid + self.msg = self.__get_body() + self.date = email.utils.parsedate_to_datetime(self.msg['Date']) + self.subject = self.__get_subject() + + def __get_body(self): + typ, data = self.client.uid('FETCH', '%s (RFC822)' % self.uid) + if typ == 'OK': + return email.message_from_bytes(data[0][1]) + else: + raise MessageError('Cannot parse message %s:\n%s!' % + (self.uid, data[0][1])) + + def __get_subject(self): + out_str = '' + for raw_str, enc in email.header.decode_header(self.msg['Subject']): + if isinstance(raw_str, bytes): + out_str += raw_str.decode(enc or 'utf8') + else: + out_str += raw_str + return out_str + + def __str__(self): + return "%s: %s (%s)" % (self.uid, self.subject, self.date) + + +class Folder(object): + def __init__(self, box, folder_sep, fld_name): + self.box = box + self.fld_name = fld_name + self.folder_separator = folder_sep + self.selected = False + + def select(self): + ret = self.box.select(mailbox=self.fld_name) + self.selected = True + return self - def __createKey(self, name, year): - """ - Create a key for the list - @param fldName String with the full name of the folder - @param year int of the year of the messages to be stored - there - @return tuple consisting from the both parameters of - this function. - """ - return(name, year) + def __emails_search(self, search_type, date_str): + ok, res = self.box.uid('SEARCH', '%s %s' % (search_type.upper(), date_str)) + if ok != 'OK': + raise MessageError('SEARCH %s %s failed!' % + (search_type.lower(), date_str)) - def search(self, fld, year): - """ - Find out whether the object with the key consisting of - the folder name and year exists in this object. + msgs = [] + for uid in res[0].decode().split(' '): + msgs.append(Message(self.box, uid)) + return msgs - @param fld Folder where the message is stored - @param year int year of the message date - @return boolean saying whether the folder for this message - has been already added to this object. - """ - key = self.__createKey(fld.getFullName(), year) - return key in self + def emails_before(self, before_str): + return self.__emails_search('BEFORE', before_str) - def archive(self): - for key in self: - self.doArchive() + def get_archive_folder(self, msg, aroot): + return self.folder_separator.join((aroot, msg.date.strftime("%Y"), self.fld_name)) -class ArchivedStore(object): - def __init__(self, serverKey=None): - config = configparser.ConfigParser() - config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) +class EmailServer(object): + def __init__(self, serverKey=None, archive_root=None): + config = get_config() acc_name = serverKey if serverKey is not None \ else config['general']['account'] self.cfg = dict(config.items(acc_name)) - # if debug: - # self.session.setDebug(True) - # try: - # self.store = self.session.getStore("imaps") - # except: - # print >> sys.stderr, "Cannot get Store" - # raise - - self._threeMonthAgo = date.today() - newmonth = self._threeMonthAgo.month - 3 - self._threeMonthAgo = self._threeMonthAgo.replace(month=newmonth) - - log.debug("host = %s, user = %s, password = %s", - self.cfg['host'], self.cfg['username'], - self.cfg['password']) - - self.__login(host, user, password) - - def __login(self, server, user, password): - try: - self.store.connect(server, user, password) - except: - log.debug("Cannot connect to %s as %s with password %s", - server, user, password) - raise - - def archive(self, from_folder, archive_base): - # type: (str, str) -> None - inboxfolder = self.store.getDefaultFolder().getFolder("INBOX") - folderList = inboxfolder.list('*') - - for folder in folderList: - if folder.getFullName()[:len("INBOX/Archiv")] != "INBOX/Archiv": - archMsgsCnt = self.__archiveFolder(folder) - # folder.close(False) - print("Processed messages = %d" % archMsgsCnt) - - def __archiveFolder(self, fld): - fld.open(Folder.READ_WRITE) - for msg in fld.getMessages(): - msgDate = __getMessageDate(msg) - print >>sys.stderr, str(msgDate), str(self._threeMonthAgo) - if msgDate < self._threeMonthAgo: - archFolder = self.__getArchiveFolderName(msg) -# print >> sys.stderr, archFolder -# fld.copyMessages(array([msg],type(msg)), archFolder) -# msg.setFlag(Flags.Flag.DELETED, true) - print("%s -> %s : %s" % (fld.getFullName(), - archFolder.getFullName(), msgDate)) - folderLen = len(fld.getMessages()) - fld.close(False) - return(folderLen) + self.archive_root = archive_root + + self.__box = self.__login(**self.cfg) + self.__folder_sep = self.__get_separator() + + def __login(self, host='localhost', username=None, password=None, ssl=None): + box = imaplib.IMAP4_SSL(host=host) + ok, data = box.login(username, password) + if ok == 'OK': + return box + else: + raise ServerError('Cannot login with credentials %s' % + str((host, username, password,))) + + def __get_separator(self): + # type: () -> None + ok, data = self.__box.list('""', '""') + if ok != 'OK': + raise ServerError('Cannot list known folders') + + data = data[0].decode().split(' ') + if len(data) == 3: + return data[1].strip(' "') + else: + raise ServerError('Cannot find folder separator from %s' % data) + + def archive_folder(self, folder_name, before_date): + # type: (str, datetime.date) -> None + """ + Archive one folder to the proper archiving positioins. + + :param: folder_name + :param: before_date + """ + copy_cache = {} + fld = Folder(self.__box, self.__folder_sep, folder_name).select() + before_str = before_date.strftime('%d-%b-%Y') + for msg in fld.emails_before(before_str): + arch_folder = fld.get_archive_folder(msg, self.archive_root) + if arch_folder in copy_cache: + copy_cache[arch_folder].append(msg) + else: + copy_cache[arch_folder] = [msg] + + for key in copy_cache: + log.info('\n\n%s:', key) + for msg in copy_cache[key]: + log.info('\tmsg: %s', str(msg)) + + # TODO: Projdi keš a proveď operaci def __enter__(self): return self - def __exit__(self): - pass # FIXME self.something.close() + def __exit__(self, *args): + if args != (None, None, None): + log.warning('args = %s', args) + else: + self.__box.close() if __name__ == '__main__': + locale.setlocale(locale.LC_ALL, 'en_US') argp = argparse.ArgumentParser() - argp.add_argument('-d', - help='How old messages we should keep') - argp.add_argument('server', - help='Symbolic name of the server to be used') - argp.add_argument('folder', - help='Folder which should be archived') - argp.add_argument('archive', - help='Root folder to store annual archives to') + subps = argp.add_subparsers(dest='cmd') + subp_list = subps.add_parser('servers', help='List available servers (by keywords)') + subp_arc = subps.add_parser('archive', help='Archive folder[s]') + subp_arc.add_argument('-d', type=int, default=14, dest='days', + help='How old messages we should keep') + subp_arc.add_argument('-s', '--server', default='localhost', + help='Symbolic name of the server to be used') + subp_arc.add_argument('folder', help='Folder which should be archived') + subp_arc.add_argument('archive', help='Root folder to store annual archives to') + args = argp.parse_args() - with ArchivedStore(args.server) as myStore: - myStore.archive(args.folder, args.archive) + if args.cmd == 'list': + config = get_config() + sects = set(config.keys()) - {'DEFAULT', 'general'} + print('Available servers:\n%s' % tuple(sects)) + else: + before = date.today() - timedelta(days=args.days) + with EmailServer(args.server, args.archive) as myStore: + myStore.archive_folder(args.folder, before) diff --git a/listFolders.py b/listFolders.py index db75596..b815d30 100755 --- a/listFolders.py +++ b/listFolders.py @@ -1,14 +1,32 @@ #!/usr/bin/env python3.6 import configparser +import logging import os.path -import imapy +import imaplib + +logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', + level=logging.DEBUG) +log = logging.getLogger('listFolders') config = configparser.ConfigParser() config.read(os.path.expanduser('~/.config/imap_archiver.cfg')) -cfg = dict(config.items(config['general']['account'])) -box = imapy.connect(**cfg) +acc_name = config['general']['account'] +cfg = dict(config.items(acc_name)) + +box = imaplib.IMAP4_SSL(host=cfg['host']) +ok, data = box.login(cfg['username'], cfg['password']) +if ok != 'OK': + raise IOError('Cannot login with credentials %s' % str(cfg)) + +ok, data = box.list('""', '""') +sep = data[0].split()[1].decode() + +ok, data = box.list() +if ok != 'OK': + raise IOError('Cannot list known folders') -for fold in box.folders(): - print(fold) +for fld in data: + spl_fld = fld.decode().split(sep) + print(spl_fld[1].strip()) diff --git a/previousAttempts/archiveIMAP.pl b/previousAttempts/archiveIMAP.pl new file mode 100644 index 0000000..a1833de --- /dev/null +++ b/previousAttempts/archiveIMAP.pl @@ -0,0 +1,144 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Mail::IMAPClient; +use IO::Socket::SSL; +use Data::Dumper; +use DateTime; +use DateTime::Format::Strptime; +use Config::IniFiles; + +# possible values are currently -- zimbra, localhost, pobox +my $account = "localhost"; + +# How many months before today the cut date should be? +my $howManyMonths = 3; +my $debug = 0; + +# get configuration for the account +my $conf = Config::IniFiles->new( -file => "/home/matej/.bugzillarc"); +die "No configuration for account $account" unless $conf->SectionExists($account); +my $hostname = $conf->val($account,'host'); +my $login = $conf->val($account,'name'); +my $password = $conf->val($account,'password'); +my $ssl= $conf->val($account,'ssl'); + +sub getTargetFolder { + my $source = shift; + my $year = shift; + + $source =~ s/^\/*(.*)\/*$/$1/; + return "/INBOX/Archiv/" . $year . '/' . $source; +} + +# makes sure that the folder including its parents +# RFC2060 says in 6.3.3 that server SHOULD create +# parents, so just to be sure if it doesn't. +sub assureFolder { + my $imaphandle = shift; + my $fullfoldername = shift; + my $sep = $imap->separator($fullfoldername); + + if ($imaphandle->exists($fullfoldername)) { + return 1; + } + my $parentpath = join ($sep, + (split /$sep/,$fullfoldername)[0,-1] + ); + assureFolder($imaphandle,$parentpath); + $imaphandle->create($fullfoldername) or + die "Unable to create folder $fullfoldername"; +} + +our $Strp = new DateTime::Format::Strptime( + pattern => '%a, %d %b %Y %H:%M:%S %z' +); +our $StrpNoTZ = new DateTime::Format::Strptime( + pattern => '%a, %d %b %Y %H:%M:%S' +); + +sub getMessageYear { + my $msgStr = shift; + my $msgDt = $Strp->parse_datetime($msgStr); + + if (!$msgDt) { + $msgDt = $StrpNoTZ->parse_datetime($msgStr); + } + if (!$msgDt) { + print "Date EMPTY.\n"; + return ""; # TODO: message without Date: + # not sure what to do about it + # Currently just do nothing and + # return empty string. + } + my $year = $msgDt->year; + if ($debug) { + print "\$msgStr = $msgStr, \$msgDt = $msgDt, year = $year\n"; + } + + return $year; +} + +my $imap = Mail::IMAPClient->new(); +if ($ssl) { + my $sslSocket=IO::Socket::SSL->new("$hostname:imaps"); + die ("Error connecting - $@") unless defined $sslSocket; + $sslSocket->autoflush(1); + + $imap = Mail::IMAPClient->new( + Server => $hostname, + Socket => $sslSocket, + User => $login, + Debug => $debug, + Password => $password, + UID => 1 + ) or die "Cannot connect to localhost as matej: $@"; +} else { + $imap = Mail::IMAPClient->new( + Server => $hostname, + User => $login, + Debug => $debug, + Password => $password, + UID => 1 + ) or die "Cannot connect to localhost as matej: $@"; +} + +my $cutDate = DateTime->now(); +$cutDate->add( months => -$howManyMonths ); + +my @sourceFolders = grep(!/^INBOX\/Archiv/,$imap->folders()); +my %targetedMessages; +my ($msgYear,$msgDateStr,$targetFolder); + +foreach my $folder (@sourceFolders) { + $imap->select($folder); + die "Cannot select folder $folder\n" if $@; + my @msgsProc = $imap->search(" UNDELETED BEFORE " . $cutDate->strftime("%d-%b-%Y")); + if ($#msgsProc > 0) { + print "Move $#msgsProc in $folder.\n"; + foreach my $msg (@msgsProc) { + $msgYear = getMessageYear($imap->date($msg)); + if ($msgYear !~ /^\s*$/) { + $targetFolder = getTargetFolder($folder,$msgYear); + if ($debug) { + print "Move message $msg from the folder $folder to $targetFolder.\n"; + } + push ( @{ $targetedMessages{$folder} } , $msg); + } + } + } + foreach my $tFolder (keys %targetedMessages) { + if (!($imap->exists($tFolder))) { + # Not sure how would following deal with non-existent + # parent folder, so rather recursively create + # parents. + # + #$imap->create($tFolder) + # or die "Could not create $tFolder: $@\n"; + assureFolder($imap,$tFolder); + } + $imap->move($tFolder,$targetedMessages{$tFolder}); + } +} +$imap->close(); \ No newline at end of file -- cgit