diff options
Diffstat (limited to 'interfaces/xml')
-rwxr-xr-x | interfaces/xml/be-mbox-to-xml | 39 | ||||
-rwxr-xr-x | interfaces/xml/be-xml-to-mbox | 6 |
2 files changed, 33 insertions, 12 deletions
diff --git a/interfaces/xml/be-mbox-to-xml b/interfaces/xml/be-mbox-to-xml index 335f92f..a740117 100755 --- a/interfaces/xml/be-mbox-to-xml +++ b/interfaces/xml/be-mbox-to-xml @@ -25,11 +25,10 @@ followed by a blank line. import base64 import email.utils from libbe.encoding import get_encoding, set_IO_stream_encodings +from libbe.utility import time_to_str from mailbox import mbox, Message # the mailbox people really want an on-disk copy -from time import asctime, gmtime +from time import asctime, gmtime, mktime import types -from xml.sax import make_parser -from xml.sax.handler import ContentHandler from xml.sax.saxutils import escape DEFAULT_ENCODING = get_encoding() @@ -37,14 +36,34 @@ set_IO_stream_encodings(DEFAULT_ENCODING) KNOWN_IDS = [] +def normalize_email_address(address): + """ + Standardize whitespace, etc. + """ + return email.utils.formataddr(email.utils.parseaddr(address)) + +def normalize_RFC_2822_date(date): + """ + Some email clients write non-RFC 2822-compliant date tags like: + Fri, 18 Sep 2009 08:49:02 -0400 (EDT) + with the non-standard (EDT) timezone name. This funtion attempts + to deal with such inconsistencies. + """ + time_tuple = email.utils.parsedate(date) + assert time_tuple != None, \ + 'unparsable date: "%s"' % date + return time_to_str(mktime(time_tuple)) + def comment_message_to_xml(message, fields=None): if fields == None: fields = {} new_fields = {} new_fields[u'alt-id'] = message[u'message-id'] new_fields[u'in-reply-to'] = message[u'in-reply-to'] - new_fields[u'from'] = message[u'from'] + new_fields[u'author'] = normalize_email_address(message[u'from']) new_fields[u'date'] = message[u'date'] + if new_fields[u'date'] != None: + new_fields[u'date'] = normalize_RFC_2822_date(new_fields[u'date']) new_fields[u'content-type'] = message.get_content_type() for k,v in new_fields.items(): if v != None and type(v) != types.UnicodeType: @@ -67,25 +86,27 @@ def comment_message_to_xml(message, fields=None): fields[u'in-reply-to'] = refs[0] # default to the first else: # check for mutliple in-reply-to references. refs = fields[u'in-reply-to'].split() + found_ref = False for ref in refs: # search for a known reference id. if ref in KNOWN_IDS: fields[u'in-reply-to'] = ref + found_ref = True break - if fields[u'in-reply-to'] == None and len(refs) > 0: + if found_ref == False and len(refs) > 0: fields[u'in-reply-to'] = refs[0] # default to the first - if fields['alt-id'] != None: - KNOWN_IDS.append(fields['alt-id']) + if fields[u'alt-id'] != None: + KNOWN_IDS.append(fields[u'alt-id']) if message.is_multipart(): ret = [] alt_id = fields[u'alt-id'] - from_str = fields[u'from'] + from_str = fields[u'author'] date = fields[u'date'] for m in message.walk(): if m == message: continue - fields[u'from'] = from_str + fields[u'author'] = from_str fields[u'date'] = date if len(ret) > 0: # we've added one part already fields.pop(u'alt-id') # don't pass alt-id to other parts diff --git a/interfaces/xml/be-xml-to-mbox b/interfaces/xml/be-xml-to-mbox index ea77c34..c630447 100755 --- a/interfaces/xml/be-xml-to-mbox +++ b/interfaces/xml/be-xml-to-mbox @@ -129,7 +129,7 @@ class Comment (LimitedAttrDict): u"alt-id", u"short-name", u"in-reply-to", - u"from", + u"author", u"date", u"content-type", u"body"] @@ -137,7 +137,7 @@ class Comment (LimitedAttrDict): if bug == None: bug = Bug() bug[u"uuid"] = u"no-uuid" - name,addr = email.utils.parseaddr(self["from"]) + name,addr = email.utils.parseaddr(self["author"]) print "From %s %s" % (addr, rfc2822_to_asctime(self["date"])) if "uuid" in self: id = self["uuid"] elif "alt-id" in self: id = self["alt-id"] @@ -145,7 +145,7 @@ class Comment (LimitedAttrDict): if id != None: print "Message-ID: <%s@%s>" % (id, DEFAULT_DOMAIN) print "Date: %s" % self["date"] - print "From: %s" % self["from"] + print "From: %s" % self["author"] subject = "" if "short-name" in self: subject += self["short-name"]+u": " |