aboutsummaryrefslogblamecommitdiffstats
path: root/misc/xml/be-xml-to-mbox
blob: c8b7479a7a3acf160bcf168488ebe7bde3d7c493 (plain) (tree)
1
2
3
4
5
6
7
8
                     
                                                           
 



                                                                      
 



                                                                 
 


                                                                         











                                                                                    

                                                                       
                                




                                              




                                         
                                        



































                                                                                    



                          
                          
                               
                            







                                                                            

                                                  


                                                                           
                                                  



                                                     




















                                                                                 
 




                                               

                                
                        

                             
                        
                      
                              

                               



                                      
                                                         
                                                                     



                                                  


                                                        
                                       
                                         







                                               

                                             
                                                              
                                   
                                              
                                                 

                                                    






                                                              
                



                                                                                 








                                                      
 








                                  


                                         

                          
                 
              


                                                               




                                                                            


                                                                       
#!/usr/bin/env python
# Copyright (C) 2009-2010 W. Trevor King <wking@drexel.edu>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Convert xml output of `be list --xml` into mbox format for browsing
with a mail reader.  For example
  $ be list --xml --status=all | be-xml-to-mbox | catmutt

mbox is a flat-file format, consisting of a series of messages.
Messages begin with a a From_ line, followed by RFC 822 email,
followed by a blank line.
"""

#from mailbox import mbox, Message  # the mailbox people really want an on-disk copy
import email.utils
from libbe.util.encoding import get_output_encoding
from libbe.util.utility import str_to_time as rfc2822_to_gmtime_integer
from time import asctime, gmtime
import types
try: # import core module, Python >= 2.5
    from xml.etree import ElementTree
except ImportError: # look for non-core module
    from elementtree import ElementTree
from xml.sax.saxutils import unescape


DEFAULT_DOMAIN = "invalid.com"
DEFAULT_EMAIL = "dummy@" + DEFAULT_DOMAIN
DEFAULT_ENCODING = get_output_encoding()

def rfc2822_to_asctime(rfc2822_string):
    """Convert an RFC 2822-fomatted string into a asctime string.
    >>> rfc2822_to_asctime("Thu, 01 Jan 1970 00:00:00 +0000")
    "Thu Jan 01 00:00:00 1970"
    """
    if rfc2822_string == "":
        return asctime(gmtime(0))
    return asctime(gmtime(rfc2822_to_gmtime_integer(rfc2822_string)))

class LimitedAttrDict (dict):
    """
    Dict with error checking, to avoid invalid bug/comment fields.
    """
    _attrs = [] # override with list of valid attribute names
    def __init__(self, **kwargs):
        dict.__init__(self)
        for key,value in kwargs.items():
            self[key] = value
    def __setitem__(self, key, item):
        self._validate_key(key)
        dict.__setitem__(self, key, item)
    def _validate_key(self, key):
        if key in self._attrs:
            return
        elif type(key) not in types.StringTypes:
            raise TypeError, "Invalid attribute type %s for '%s'" % (type(key), key)
        else:
            raise ValueError, "Invalid attribute name '%s'" % key

class Bug (LimitedAttrDict):
    _attrs = [u"uuid",
              u"short-name",
              u"severity",
              u"status",
              u"assigned",
              u"reporter",
              u"creator",
              u"created",
              u"summary",
              u"comments",
              u"extra-strings"]
    def print_to_mbox(self):
        if "creator" in self:
            # otherwise, probably a `be show` uuid-only bug to avoid
            # root comments.
            name,addr = email.utils.parseaddr(self["creator"])
            print "From %s %s" % (addr, rfc2822_to_asctime(self["created"]))
            print "Message-id: <%s@%s>" % (self["uuid"], DEFAULT_DOMAIN)
            print "Date: %s" % self["created"]
            print "From: %s" % self["creator"]
            print "Content-Type: %s; charset=%s" \
                % ("text/plain", DEFAULT_ENCODING)
            print "Content-Transfer-Encoding: 8bit"
            print "Subject: %s: %s" % (self["short-name"], self["summary"])
            if "extra-strings" in self:
                for estr in self["extra-strings"]:
                    print "X-Extra-String: %s" % estr
            print ""
            print self["summary"]
            print ""
        if "comments" in self:
            for comment in self["comments"]:
                comment.print_to_mbox(self)            
    def init_from_etree(self, element):
        assert element.tag == "bug", element.tag
        for field in element.getchildren():
            text = unescape(unicode(field.text).decode("unicode_escape").strip())
            if field.tag == "comment":
                comm = Comment()
                comm.init_from_etree(field)
                if "comments" in self:
                    self["comments"].append(comm)
                else:
                    self["comments"] = [comm]
            elif field.tag == "extra-string":
                if "extra-strings" in self:
                    self["extra-strings"].append(text)
                else:
                    self["extra-strings"] = [text]
            else:
                self[field.tag] = text

def wrap_id(id):
    if "@" not in id:
        return "<%s@%s>" % (id, DEFAULT_DOMAIN)
    return id

class Comment (LimitedAttrDict):
    _attrs = [u"uuid",
              u"alt-id",
              u"short-name",
              u"in-reply-to",
              u"author",
              u"date",
              u"content-type",
              u"body",
              u"extra-strings"]
    def print_to_mbox(self, bug=None):
        if bug == None:
            bug = Bug()
            bug[u"uuid"] = u"no-uuid"
        name,addr = email.utils.parseaddr(self["author"])
        print "From %s %s" % (addr, rfc2822_to_asctime(self["date"]))
        if "uuid" in self:     id = self["uuid"]
        elif "alt-id" in self: id = self["alt-id"]
        else:                  id = None
        if id != None:
            print "Message-id: %s" % wrap_id(id)
        if "alt-id" in self:
            print "Alt-id: %s" % wrap_id(self["alt-id"])
        print "Date: %s" % self["date"]
        print "From: %s" % self["author"]
        subject = ""
        if "short-name" in self:
            subject += self["short-name"]+u": "
        if "summary" in bug:
            subject += bug["summary"]
        else:
            subject += u"no-subject"
        print "Subject: %s" % subject
        if "in-reply-to" not in self.keys():
            self["in-reply-to"] = bug["uuid"]
        print "In-Reply-To: %s" % wrap_id(self["in-reply-to"])
        if "extra-strings" in self:
            for estr in self["extra-strings"]:
                print "X-Extra-String: %s" % estr
        if self["content-type"].startswith("text/"):
            print "Content-Transfer-Encoding: 8bit"
            print "Content-Type: %s; charset=%s" \
                % (self["content-type"], DEFAULT_ENCODING)
        else:
            print "Content-Transfer-Encoding: base64"
            print "Content-Type: %s;" % (self["content-type"])
        print ""
        print self["body"]
        print ""
    def init_from_etree(self, element):
        assert element.tag == "comment", element.tag
        for field in element.getchildren():
            text = unescape(unicode(field.text).decode("unicode_escape").strip())
            if field.tag == "extra-string":
                if "extra-strings" in self:
                    self["extra-strings"].append(text)
                else:
                    self["extra-strings"] = [text]
            else:
                if field.tag == "body":
                    text+="\n"
                self[field.tag] = text

def print_to_mbox(element):
    if element.tag == "bug":
        b = Bug()
        b.init_from_etree(element)
        b.print_to_mbox()
    elif element.tag == "comment":
        c = Comment()
        c.init_from_etree(element)
        c.print_to_mbox()
    elif element.tag in ["be-xml"]:
        for elt in element.getchildren():
            print_to_mbox(elt)

if __name__ == "__main__":
    import codecs
    import sys
    
    sys.stdin = codecs.getreader(DEFAULT_ENCODING)(sys.stdin)
    sys.stdout = codecs.getwriter(DEFAULT_ENCODING)(sys.stdout)

    if len(sys.argv) == 1: # no filename given, use stdin
        xml_unicode = sys.stdin.read()
    else:
        xml_unicode = codecs.open(sys.argv[1], "r", DEFAULT_ENCODING).read()
    xml_str = xml_unicode.encode("unicode_escape").replace(r"\n", "\n")
    elist = ElementTree.XML(xml_str)
    print_to_mbox(elist)