aboutsummaryrefslogtreecommitdiffstats
path: root/interfaces/xml
diff options
context:
space:
mode:
Diffstat (limited to 'interfaces/xml')
-rwxr-xr-xinterfaces/xml/be-mbox-to-xml39
-rwxr-xr-xinterfaces/xml/be-xml-to-mbox6
2 files changed, 33 insertions, 12 deletions
diff --git a/interfaces/xml/be-mbox-to-xml b/interfaces/xml/be-mbox-to-xml
index 335f92f..a740117 100755
--- a/interfaces/xml/be-mbox-to-xml
+++ b/interfaces/xml/be-mbox-to-xml
@@ -25,11 +25,10 @@ followed by a blank line.
import base64
import email.utils
from libbe.encoding import get_encoding, set_IO_stream_encodings
+from libbe.utility import time_to_str
from mailbox import mbox, Message # the mailbox people really want an on-disk copy
-from time import asctime, gmtime
+from time import asctime, gmtime, mktime
import types
-from xml.sax import make_parser
-from xml.sax.handler import ContentHandler
from xml.sax.saxutils import escape
DEFAULT_ENCODING = get_encoding()
@@ -37,14 +36,34 @@ set_IO_stream_encodings(DEFAULT_ENCODING)
KNOWN_IDS = []
+def normalize_email_address(address):
+ """
+ Standardize whitespace, etc.
+ """
+ return email.utils.formataddr(email.utils.parseaddr(address))
+
+def normalize_RFC_2822_date(date):
+ """
+ Some email clients write non-RFC 2822-compliant date tags like:
+ Fri, 18 Sep 2009 08:49:02 -0400 (EDT)
+ with the non-standard (EDT) timezone name. This funtion attempts
+ to deal with such inconsistencies.
+ """
+ time_tuple = email.utils.parsedate(date)
+ assert time_tuple != None, \
+ 'unparsable date: "%s"' % date
+ return time_to_str(mktime(time_tuple))
+
def comment_message_to_xml(message, fields=None):
if fields == None:
fields = {}
new_fields = {}
new_fields[u'alt-id'] = message[u'message-id']
new_fields[u'in-reply-to'] = message[u'in-reply-to']
- new_fields[u'from'] = message[u'from']
+ new_fields[u'author'] = normalize_email_address(message[u'from'])
new_fields[u'date'] = message[u'date']
+ if new_fields[u'date'] != None:
+ new_fields[u'date'] = normalize_RFC_2822_date(new_fields[u'date'])
new_fields[u'content-type'] = message.get_content_type()
for k,v in new_fields.items():
if v != None and type(v) != types.UnicodeType:
@@ -67,25 +86,27 @@ def comment_message_to_xml(message, fields=None):
fields[u'in-reply-to'] = refs[0] # default to the first
else: # check for mutliple in-reply-to references.
refs = fields[u'in-reply-to'].split()
+ found_ref = False
for ref in refs: # search for a known reference id.
if ref in KNOWN_IDS:
fields[u'in-reply-to'] = ref
+ found_ref = True
break
- if fields[u'in-reply-to'] == None and len(refs) > 0:
+ if found_ref == False and len(refs) > 0:
fields[u'in-reply-to'] = refs[0] # default to the first
- if fields['alt-id'] != None:
- KNOWN_IDS.append(fields['alt-id'])
+ if fields[u'alt-id'] != None:
+ KNOWN_IDS.append(fields[u'alt-id'])
if message.is_multipart():
ret = []
alt_id = fields[u'alt-id']
- from_str = fields[u'from']
+ from_str = fields[u'author']
date = fields[u'date']
for m in message.walk():
if m == message:
continue
- fields[u'from'] = from_str
+ fields[u'author'] = from_str
fields[u'date'] = date
if len(ret) > 0: # we've added one part already
fields.pop(u'alt-id') # don't pass alt-id to other parts
diff --git a/interfaces/xml/be-xml-to-mbox b/interfaces/xml/be-xml-to-mbox
index ea77c34..c630447 100755
--- a/interfaces/xml/be-xml-to-mbox
+++ b/interfaces/xml/be-xml-to-mbox
@@ -129,7 +129,7 @@ class Comment (LimitedAttrDict):
u"alt-id",
u"short-name",
u"in-reply-to",
- u"from",
+ u"author",
u"date",
u"content-type",
u"body"]
@@ -137,7 +137,7 @@ class Comment (LimitedAttrDict):
if bug == None:
bug = Bug()
bug[u"uuid"] = u"no-uuid"
- name,addr = email.utils.parseaddr(self["from"])
+ name,addr = email.utils.parseaddr(self["author"])
print "From %s %s" % (addr, rfc2822_to_asctime(self["date"]))
if "uuid" in self: id = self["uuid"]
elif "alt-id" in self: id = self["alt-id"]
@@ -145,7 +145,7 @@ class Comment (LimitedAttrDict):
if id != None:
print "Message-ID: <%s@%s>" % (id, DEFAULT_DOMAIN)
print "Date: %s" % self["date"]
- print "From: %s" % self["from"]
+ print "From: %s" % self["author"]
subject = ""
if "short-name" in self:
subject += self["short-name"]+u": "