aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2018-04-20 17:25:11 +0200
committerMatěj Cepl <mcepl@cepl.eu>2018-04-20 17:25:11 +0200
commit1e87030163b836dffd9ea9f20d5a442f302b0efb (patch)
treef8b96c7d654b66ff3087a7ac6f3d268dd469b54a
parent302a4932c633e79d34043250a2701c72d9a34d2f (diff)
downloadimapArch-1e87030163b836dffd9ea9f20d5a442f302b0efb.tar.gz
Some progress
-rwxr-xr-xarchive_folder.py323
-rwxr-xr-xlistFolders.py28
-rw-r--r--previousAttempts/archiveIMAP.pl (renamed from archiveIMAP.pl)0
3 files changed, 171 insertions, 180 deletions
diff --git a/archive_folder.py b/archive_folder.py
index 7859bea..d4aca54 100755
--- a/archive_folder.py
+++ b/archive_folder.py
@@ -9,213 +9,186 @@
import argparse
import configparser
import email
+import email.header
+import email.utils
+import locale
import logging
+import imaplib
import os
+import pprint
import sys
-from datetime import date
+from datetime import date, timedelta
logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
stream=sys.stdout, level=logging.DEBUG)
log = logging.getLogger('imapArch')
-# from java.util import Properties, Date
-# from java.lang import System
-# from javax.mail import *
-# from jarray import array
-
-
-class FolderNotFoundError(IOError):
+class ServerError(IOError):
pass
-def __getMessageDate(msg):
- """
- Return the date of the message
-
- @param msg analyzed message
- @return GregorianCalendar of the messages date
- @throws MessagingException
- """
- dateMS = msg.getReceivedDate().getTime() // 1000
- dateStruct = date.fromtimestamp(dateMS)
- return dateStruct
-
+class FolderError(IOError):
+ pass
-class ArchivableFolder(list):
- def __init__(self, source, year):
- """
- Constructor for the folder.
-
- @param source folder from the messages should be archived
- @param year int of the year for which the messages are
- archived
- """
- self.sourceFolder = source
- targetName = self.__getArchiveFolderName(source.getFullName(), year)
- self.targetFolder = self.sourceFolder.getFolder(targetName)
+class MessageError(IOError):
+ pass
- def __getArchiveFolderName(self, srcFldName, year):
- """
- @param folder string with the folder name
- @param year int
- @return
- """
- archFolder = "INBOX/Archiv/"
- rootLen = len("INBOX/")
- if not(srcFldName[:rootLen] == "/INBOX/"):
- raise FolderNotFoundError(
- "We expect all folders to be under INBOX folder.")
- baseName = srcFldName[rootLen:]
- archFolder = "/" + archFolder + year + "/" + baseName
- return archFolder
-
- def add(self, msg):
- self.append(msg)
-
- def doArchive(self):
- for message in self:
- log.debug("Moving %s from %s to %s.",
- message, self.sourceFolder.getFullName(),
- self.targetFolder.getFullName())
- # self.sourceFolder.copyMessages(array(self, Message),
- # self.targetFolder)
- # for message in self:
- # message.setFlag(Flags.Flag.DELETED, true)
- # self.sourceFolder.expunge()
-
-
-class Archives(dict):
- """Collects archivable folders indexed by tuple of
- folderName and year
- """
-
- def __init__(self):
- pass
-
- def add(self, msg):
- """
- Check whether the ArchivableFolder (@see ArchivableFolder)
- for the particular year actually exists, and if not then create
- it. In any event add
- @param msg
- """
- fld = msg.getFolder()
- msgDate = __getMessageDate(msg)
- year = msgDate.year
- if not(self.search(fld, year)):
- archfld = ArchivableFolder(fld, year)
- self[self.__createKey(fld, year)] = archfld
+def get_config():
+ config = configparser.ConfigParser()
+ config.read(os.path.expanduser('~/.config/imap_archiver.cfg'))
+ return config
+
+
+class Message(object):
+ """Abstraction over one email message."""
+ def __init__(self, client, uid):
+ self.client = client
+ self.uid = uid
+ self.msg = self.__get_body()
+ self.date = email.utils.parsedate_to_datetime(self.msg['Date'])
+ self.subject = self.__get_subject()
+
+ def __get_body(self):
+ typ, data = self.client.uid('FETCH', '%s (RFC822)' % self.uid)
+ if typ == 'OK':
+ return email.message_from_bytes(data[0][1])
+ else:
+ raise MessageError('Cannot parse message %s:\n%s!' %
+ (self.uid, data[0][1]))
+
+ def __get_subject(self):
+ out_str = ''
+ for raw_str, enc in email.header.decode_header(self.msg['Subject']):
+ if isinstance(raw_str, bytes):
+ out_str += raw_str.decode(enc or 'utf8')
+ else:
+ out_str += raw_str
+ return out_str
+
+ def __str__(self):
+ return "%s: %s (%s)" % (self.uid, self.subject, self.date)
+
+
+class Folder(object):
+ def __init__(self, box, folder_sep, fld_name):
+ self.box = box
+ self.fld_name = fld_name
+ self.folder_separator = folder_sep
+ self.selected = False
+
+ def select(self):
+ ret = self.box.select(mailbox=self.fld_name)
+ self.selected = True
+ return self
- def __createKey(self, name, year):
- """
- Create a key for the list
- @param fldName String with the full name of the folder
- @param year int of the year of the messages to be stored
- there
- @return tuple consisting from the both parameters of
- this function.
- """
- return(name, year)
+ def __emails_search(self, search_type, date_str):
+ ok, res = self.box.uid('SEARCH', '%s %s' % (search_type.upper(), date_str))
+ if ok != 'OK':
+ raise MessageError('SEARCH %s %s failed!' %
+ (search_type.lower(), date_str))
- def search(self, fld, year):
- """
- Find out whether the object with the key consisting of
- the folder name and year exists in this object.
+ msgs = []
+ for uid in res[0].decode().split(' '):
+ msgs.append(Message(self.box, uid))
+ return msgs
- @param fld Folder where the message is stored
- @param year int year of the message date
- @return boolean saying whether the folder for this message
- has been already added to this object.
- """
- key = self.__createKey(fld.getFullName(), year)
- return key in self
+ def emails_before(self, before_str):
+ return self.__emails_search('BEFORE', before_str)
- def archive(self):
- for key in self:
- self.doArchive()
+ def get_archive_folder(self, msg, aroot):
+ return self.folder_separator.join((aroot, msg.date.strftime("%Y"), self.fld_name))
-class ArchivedStore(object):
- def __init__(self, serverKey=None):
- config = configparser.ConfigParser()
- config.read(os.path.expanduser('~/.config/imap_archiver.cfg'))
+class EmailServer(object):
+ def __init__(self, serverKey=None, archive_root=None):
+ config = get_config()
acc_name = serverKey if serverKey is not None \
else config['general']['account']
self.cfg = dict(config.items(acc_name))
- # if debug:
- # self.session.setDebug(True)
- # try:
- # self.store = self.session.getStore("imaps")
- # except:
- # print >> sys.stderr, "Cannot get Store"
- # raise
-
- self._threeMonthAgo = date.today()
- newmonth = self._threeMonthAgo.month - 3
- self._threeMonthAgo = self._threeMonthAgo.replace(month=newmonth)
-
- log.debug("host = %s, user = %s, password = %s",
- self.cfg['host'], self.cfg['username'],
- self.cfg['password'])
-
- self.__login(host, user, password)
-
- def __login(self, server, user, password):
- try:
- self.store.connect(server, user, password)
- except:
- log.debug("Cannot connect to %s as %s with password %s",
- server, user, password)
- raise
-
- def archive(self, from_folder, archive_base):
- # type: (str, str) -> None
- inboxfolder = self.store.getDefaultFolder().getFolder("INBOX")
- folderList = inboxfolder.list('*')
-
- for folder in folderList:
- if folder.getFullName()[:len("INBOX/Archiv")] != "INBOX/Archiv":
- archMsgsCnt = self.__archiveFolder(folder)
- # folder.close(False)
- print("Processed messages = %d" % archMsgsCnt)
-
- def __archiveFolder(self, fld):
- fld.open(Folder.READ_WRITE)
- for msg in fld.getMessages():
- msgDate = __getMessageDate(msg)
- print >>sys.stderr, str(msgDate), str(self._threeMonthAgo)
- if msgDate < self._threeMonthAgo:
- archFolder = self.__getArchiveFolderName(msg)
-# print >> sys.stderr, archFolder
-# fld.copyMessages(array([msg],type(msg)), archFolder)
-# msg.setFlag(Flags.Flag.DELETED, true)
- print("%s -> %s : %s" % (fld.getFullName(),
- archFolder.getFullName(), msgDate))
- folderLen = len(fld.getMessages())
- fld.close(False)
- return(folderLen)
+ self.archive_root = archive_root
+
+ self.__box = self.__login(**self.cfg)
+ self.__folder_sep = self.__get_separator()
+
+ def __login(self, host='localhost', username=None, password=None, ssl=None):
+ box = imaplib.IMAP4_SSL(host=host)
+ ok, data = box.login(username, password)
+ if ok == 'OK':
+ return box
+ else:
+ raise ServerError('Cannot login with credentials %s' %
+ str((host, username, password,)))
+
+ def __get_separator(self):
+ # type: () -> None
+ ok, data = self.__box.list('""', '""')
+ if ok != 'OK':
+ raise ServerError('Cannot list known folders')
+
+ data = data[0].decode().split(' ')
+ if len(data) == 3:
+ return data[1].strip(' "')
+ else:
+ raise ServerError('Cannot find folder separator from %s' % data)
+
+ def archive_folder(self, folder_name, before_date):
+ # type: (str, datetime.date) -> None
+ """
+ Archive one folder to the proper archiving positioins.
+
+ :param: folder_name
+ :param: before_date
+ """
+ copy_cache = {}
+ fld = Folder(self.__box, self.__folder_sep, folder_name).select()
+ before_str = before_date.strftime('%d-%b-%Y')
+ for msg in fld.emails_before(before_str):
+ arch_folder = fld.get_archive_folder(msg, self.archive_root)
+ if arch_folder in copy_cache:
+ copy_cache[arch_folder].append(msg)
+ else:
+ copy_cache[arch_folder] = [msg]
+
+ for key in copy_cache:
+ log.info('\n\n%s:', key)
+ for msg in copy_cache[key]:
+ log.info('\tmsg: %s', str(msg))
+
+ # TODO: Projdi keš a proveď operaci
def __enter__(self):
return self
- def __exit__(self):
- pass # FIXME self.something.close()
+ def __exit__(self, *args):
+ if args != (None, None, None):
+ log.warning('args = %s', args)
+ else:
+ self.__box.close()
if __name__ == '__main__':
+ locale.setlocale(locale.LC_ALL, 'en_US')
argp = argparse.ArgumentParser()
- argp.add_argument('-d',
- help='How old messages we should keep')
- argp.add_argument('server',
- help='Symbolic name of the server to be used')
- argp.add_argument('folder',
- help='Folder which should be archived')
- argp.add_argument('archive',
- help='Root folder to store annual archives to')
+ subps = argp.add_subparsers(dest='cmd')
+ subp_list = subps.add_parser('servers', help='List available servers (by keywords)')
+ subp_arc = subps.add_parser('archive', help='Archive folder[s]')
+ subp_arc.add_argument('-d', type=int, default=14, dest='days',
+ help='How old messages we should keep')
+ subp_arc.add_argument('-s', '--server', default='localhost',
+ help='Symbolic name of the server to be used')
+ subp_arc.add_argument('folder', help='Folder which should be archived')
+ subp_arc.add_argument('archive', help='Root folder to store annual archives to')
+
args = argp.parse_args()
- with ArchivedStore(args.server) as myStore:
- myStore.archive(args.folder, args.archive)
+ if args.cmd == 'list':
+ config = get_config()
+ sects = set(config.keys()) - {'DEFAULT', 'general'}
+ print('Available servers:\n%s' % tuple(sects))
+ else:
+ before = date.today() - timedelta(days=args.days)
+ with EmailServer(args.server, args.archive) as myStore:
+ myStore.archive_folder(args.folder, before)
diff --git a/listFolders.py b/listFolders.py
index db75596..b815d30 100755
--- a/listFolders.py
+++ b/listFolders.py
@@ -1,14 +1,32 @@
#!/usr/bin/env python3.6
import configparser
+import logging
import os.path
-import imapy
+import imaplib
+
+logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
+ level=logging.DEBUG)
+log = logging.getLogger('listFolders')
config = configparser.ConfigParser()
config.read(os.path.expanduser('~/.config/imap_archiver.cfg'))
-cfg = dict(config.items(config['general']['account']))
-box = imapy.connect(**cfg)
+acc_name = config['general']['account']
+cfg = dict(config.items(acc_name))
+
+box = imaplib.IMAP4_SSL(host=cfg['host'])
+ok, data = box.login(cfg['username'], cfg['password'])
+if ok != 'OK':
+ raise IOError('Cannot login with credentials %s' % str(cfg))
+
+ok, data = box.list('""', '""')
+sep = data[0].split()[1].decode()
+
+ok, data = box.list()
+if ok != 'OK':
+ raise IOError('Cannot list known folders')
-for fold in box.folders():
- print(fold)
+for fld in data:
+ spl_fld = fld.decode().split(sep)
+ print(spl_fld[1].strip())
diff --git a/archiveIMAP.pl b/previousAttempts/archiveIMAP.pl
index a1833de..a1833de 100644
--- a/archiveIMAP.pl
+++ b/previousAttempts/archiveIMAP.pl