aboutsummaryrefslogtreecommitdiffstats
path: root/interfaces
diff options
context:
space:
mode:
authorW. Trevor King <wking@drexel.edu>2009-09-23 12:18:31 -0400
committerW. Trevor King <wking@drexel.edu>2009-09-23 12:18:31 -0400
commita183301a11d67ef35727d3dec8dbef6b3529f631 (patch)
tree955ec4191f740f529d2e05c9bd6f01999dd4b548 /interfaces
parentb76fa539c4e8eb6b2d9bb6f34841c7c21b52e498 (diff)
downloadbugseverywhere-a183301a11d67ef35727d3dec8dbef6b3529f631.tar.gz
Added normalize_RFC_2822_date() to be-mbox-to-xml.
The if new_fields[u'date'] != None: bit avoids attemting to normalize missing dates (which fails). You get messages with missing dates when comment_message_to_xml() is called recursively for multipart messages. Also fixed some unicode keys (['X'] -> [u'X']) for consistency.
Diffstat (limited to 'interfaces')
-rwxr-xr-xinterfaces/xml/be-mbox-to-xml24
1 files changed, 21 insertions, 3 deletions
diff --git a/interfaces/xml/be-mbox-to-xml b/interfaces/xml/be-mbox-to-xml
index 338982e..a740117 100755
--- a/interfaces/xml/be-mbox-to-xml
+++ b/interfaces/xml/be-mbox-to-xml
@@ -25,8 +25,9 @@ followed by a blank line.
import base64
import email.utils
from libbe.encoding import get_encoding, set_IO_stream_encodings
+from libbe.utility import time_to_str
from mailbox import mbox, Message # the mailbox people really want an on-disk copy
-from time import asctime, gmtime
+from time import asctime, gmtime, mktime
import types
from xml.sax.saxutils import escape
@@ -36,8 +37,23 @@ set_IO_stream_encodings(DEFAULT_ENCODING)
KNOWN_IDS = []
def normalize_email_address(address):
+ """
+ Standardize whitespace, etc.
+ """
return email.utils.formataddr(email.utils.parseaddr(address))
+def normalize_RFC_2822_date(date):
+ """
+ Some email clients write non-RFC 2822-compliant date tags like:
+ Fri, 18 Sep 2009 08:49:02 -0400 (EDT)
+ with the non-standard (EDT) timezone name. This funtion attempts
+ to deal with such inconsistencies.
+ """
+ time_tuple = email.utils.parsedate(date)
+ assert time_tuple != None, \
+ 'unparsable date: "%s"' % date
+ return time_to_str(mktime(time_tuple))
+
def comment_message_to_xml(message, fields=None):
if fields == None:
fields = {}
@@ -46,6 +62,8 @@ def comment_message_to_xml(message, fields=None):
new_fields[u'in-reply-to'] = message[u'in-reply-to']
new_fields[u'author'] = normalize_email_address(message[u'from'])
new_fields[u'date'] = message[u'date']
+ if new_fields[u'date'] != None:
+ new_fields[u'date'] = normalize_RFC_2822_date(new_fields[u'date'])
new_fields[u'content-type'] = message.get_content_type()
for k,v in new_fields.items():
if v != None and type(v) != types.UnicodeType:
@@ -77,8 +95,8 @@ def comment_message_to_xml(message, fields=None):
if found_ref == False and len(refs) > 0:
fields[u'in-reply-to'] = refs[0] # default to the first
- if fields['alt-id'] != None:
- KNOWN_IDS.append(fields['alt-id'])
+ if fields[u'alt-id'] != None:
+ KNOWN_IDS.append(fields[u'alt-id'])
if message.is_multipart():
ret = []