diff options
author | W. Trevor King <wking@drexel.edu> | 2009-09-23 12:18:31 -0400 |
---|---|---|
committer | W. Trevor King <wking@drexel.edu> | 2009-09-23 12:18:31 -0400 |
commit | a183301a11d67ef35727d3dec8dbef6b3529f631 (patch) | |
tree | 955ec4191f740f529d2e05c9bd6f01999dd4b548 /interfaces/xml | |
parent | b76fa539c4e8eb6b2d9bb6f34841c7c21b52e498 (diff) | |
download | bugseverywhere-a183301a11d67ef35727d3dec8dbef6b3529f631.tar.gz |
Added normalize_RFC_2822_date() to be-mbox-to-xml.
The
if new_fields[u'date'] != None:
bit avoids attemting to normalize missing dates (which fails). You
get messages with missing dates when comment_message_to_xml() is
called recursively for multipart messages.
Also fixed some unicode keys (['X'] -> [u'X']) for consistency.
Diffstat (limited to 'interfaces/xml')
-rwxr-xr-x | interfaces/xml/be-mbox-to-xml | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/interfaces/xml/be-mbox-to-xml b/interfaces/xml/be-mbox-to-xml index 338982e..a740117 100755 --- a/interfaces/xml/be-mbox-to-xml +++ b/interfaces/xml/be-mbox-to-xml @@ -25,8 +25,9 @@ followed by a blank line. import base64 import email.utils from libbe.encoding import get_encoding, set_IO_stream_encodings +from libbe.utility import time_to_str from mailbox import mbox, Message # the mailbox people really want an on-disk copy -from time import asctime, gmtime +from time import asctime, gmtime, mktime import types from xml.sax.saxutils import escape @@ -36,8 +37,23 @@ set_IO_stream_encodings(DEFAULT_ENCODING) KNOWN_IDS = [] def normalize_email_address(address): + """ + Standardize whitespace, etc. + """ return email.utils.formataddr(email.utils.parseaddr(address)) +def normalize_RFC_2822_date(date): + """ + Some email clients write non-RFC 2822-compliant date tags like: + Fri, 18 Sep 2009 08:49:02 -0400 (EDT) + with the non-standard (EDT) timezone name. This funtion attempts + to deal with such inconsistencies. + """ + time_tuple = email.utils.parsedate(date) + assert time_tuple != None, \ + 'unparsable date: "%s"' % date + return time_to_str(mktime(time_tuple)) + def comment_message_to_xml(message, fields=None): if fields == None: fields = {} @@ -46,6 +62,8 @@ def comment_message_to_xml(message, fields=None): new_fields[u'in-reply-to'] = message[u'in-reply-to'] new_fields[u'author'] = normalize_email_address(message[u'from']) new_fields[u'date'] = message[u'date'] + if new_fields[u'date'] != None: + new_fields[u'date'] = normalize_RFC_2822_date(new_fields[u'date']) new_fields[u'content-type'] = message.get_content_type() for k,v in new_fields.items(): if v != None and type(v) != types.UnicodeType: @@ -77,8 +95,8 @@ def comment_message_to_xml(message, fields=None): if found_ref == False and len(refs) > 0: fields[u'in-reply-to'] = refs[0] # default to the first - if fields['alt-id'] != None: - KNOWN_IDS.append(fields['alt-id']) + if fields[u'alt-id'] != None: + KNOWN_IDS.append(fields[u'alt-id']) if message.is_multipart(): ret = [] |