aboutsummaryrefslogblamecommitdiffstats
path: root/misc/xml/be-xml-to-mbox
blob: 48454f9056c56cb96157aa9ea590d8d8e0d76ae6 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
                     
                                                     
                                                           
                                                           
 
                                       
 



                                                                               
 
                                                                        


                                                                               
 

                                                                              









                                                                   
                                                                                     
                  

                                                                       
                                
            
                                 

                                     

                                         
                                        
 
 








                                                                     

                            


                                                                  

                                                              

                                 
                                         
                             
 


                                         
 



                                                
                                                                                    
             

                                                                 
 
                           




                            



                          
                          
                               
 
                            


                                                                    








                                                                             
                                       
                                                  



                                                      

                                            

                                           


                                                
                                                                               













                                                      
 
 




                                               

                               
                      
                        

                             
                        
                      
                              

                               
 
                                      
                       

                                     









                                                                      
                            


                                                         

                                
                                                 



                                     
                                      

                                             
                                                               
                                   
                                              
                                                  
                                                    


                                                             
             





                                                               


                                                    
                                                                               






                                                      
                                
                                      
 
 








                                  


                                         
 
 
                          
                 
              
 

                                                               
 
                                                          


                                                                            


                                                                       
#!/usr/bin/env python
# Copyright (C) 2009-2012 Chris Ball <cjb@laptop.org>
#                         Gianluca Montecchi <gian@grys.it>
#                         W. Trevor King <wking@tremily.us>
#
# This file is part of Bugs Everywhere.
#
# Bugs Everywhere is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 2 of the License, or (at your option) any
# later version.
#
# Bugs Everywhere is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# Bugs Everywhere.  If not, see <http://www.gnu.org/licenses/>.
"""
Convert xml output of `be list --xml` into mbox format for browsing
with a mail reader.  For example
  $ be list --xml --status=all | be-xml-to-mbox | catmutt

mbox is a flat-file format, consisting of a series of messages.
Messages begin with a a From_ line, followed by RFC 822 email,
followed by a blank line.
"""

# from mailbox import mbox, Message  # the mailbox people really want an on-disk copy
import email.utils
from libbe.util.encoding import get_output_encoding
from libbe.util.utility import str_to_time as rfc2822_to_gmtime_integer
from time import asctime, gmtime
import types
from xml.etree import ElementTree
from xml.sax.saxutils import unescape

DEFAULT_DOMAIN = "invalid.com"
DEFAULT_EMAIL = "dummy@" + DEFAULT_DOMAIN
DEFAULT_ENCODING = get_output_encoding()


def rfc2822_to_asctime(rfc2822_string):
    """Convert an RFC 2822-fomatted string into a asctime string.
    >>> rfc2822_to_asctime("Thu, 01 Jan 1970 00:00:00 +0000")
    "Thu Jan 01 00:00:00 1970"
    """
    if rfc2822_string == "":
        return asctime(gmtime(0))
    return asctime(gmtime(rfc2822_to_gmtime_integer(rfc2822_string)))


class LimitedAttrDict(dict):
    """
    Dict with error checking, to avoid invalid bug/comment fields.
    """
    _attrs = []  # override with list of valid attribute names

    def __init__(self, **kwargs):
        dict.__init__(self)
        for key, value in kwargs.items():
            self[key] = value

    def __setitem__(self, key, item):
        self._validate_key(key)
        dict.__setitem__(self, key, item)

    def _validate_key(self, key):
        if key in self._attrs:
            return
        elif type(key) not in types.StringTypes:
            raise TypeError("Invalid attribute type %s for '%s'" % (type(key), key))
        else:
            raise ValueError("Invalid attribute name '%s'" % key)


class Bug(LimitedAttrDict):
    _attrs = [u"uuid",
              u"short-name",
              u"severity",
              u"status",
              u"assigned",
              u"reporter",
              u"creator",
              u"created",
              u"summary",
              u"comments",
              u"extra-strings"]

    def print_to_mbox(self):
        if "creator" in self:
            # otherwise, probably a `be show` uuid-only bug to avoid
            # root comments.
            name, addr = email.utils.parseaddr(self["creator"])
            print("From %s %s" % (addr, rfc2822_to_asctime(self["created"])))
            print("Message-id: <%s@%s>" % (self["uuid"], DEFAULT_DOMAIN))
            print("Date: %s" % self["created"])
            print("From: %s" % self["creator"])
            print("Content-Type: %s; charset=%s"
                  % ("text/plain", DEFAULT_ENCODING))
            print("Content-Transfer-Encoding: 8bit")
            print("Subject: %s: %s" % (self["short-name"], self["summary"]))
            if "extra-strings" in self:
                for estr in self["extra-strings"]:
                    print("X-Extra-String: %s" % estr)
            print()
            print(self["summary"])
            print()
        if "comments" in self:
            for comment in self["comments"]:
                comment.print_to_mbox(self)

    def init_from_etree(self, element):
        assert element.tag == "bug", element.tag
        for field in element.getchildren():
            text = unescape(bytes(field.text).decode("unicode_escape").strip())
            if field.tag == "comment":
                comm = Comment()
                comm.init_from_etree(field)
                if "comments" in self:
                    self["comments"].append(comm)
                else:
                    self["comments"] = [comm]
            elif field.tag == "extra-string":
                if "extra-strings" in self:
                    self["extra-strings"].append(text)
                else:
                    self["extra-strings"] = [text]
            else:
                self[field.tag] = text


def wrap_id(id):
    if "@" not in id:
        return "<%s@%s>" % (id, DEFAULT_DOMAIN)
    return id


class Comment(LimitedAttrDict):
    _attrs = [u"uuid",
              u"alt-id",
              u"short-name",
              u"in-reply-to",
              u"author",
              u"date",
              u"content-type",
              u"body",
              u"extra-strings"]

    def print_to_mbox(self, bug=None):
        if bug is None:
            bug = Bug()
            bug[u"uuid"] = u"no-uuid"
        name, addr = email.utils.parseaddr(self["author"])
        print("From %s %s" % (addr, rfc2822_to_asctime(self["date"])))
        if "uuid" in self:
            id = self["uuid"]
        elif "alt-id" in self:
            id = self["alt-id"]
        else:
            id = None
        if id is not None:
            print("Message-id: %s" % wrap_id(id))
        if "alt-id" in self:
            print("Alt-id: %s" % wrap_id(self["alt-id"]))
        print("Date: %s" % self["date"])
        print("From: %s" % self["author"])
        subject = ""
        if "short-name" in self:
            subject += self["short-name"] + u": "
        if "summary" in bug:
            subject += bug["summary"]
        else:
            subject += u"no-subject"
        print("Subject: %s" % subject)
        if "in-reply-to" not in self.keys():
            self["in-reply-to"] = bug["uuid"]
        print("In-Reply-To: %s" % wrap_id(self["in-reply-to"]))
        if "extra-strings" in self:
            for estr in self["extra-strings"]:
                print("X-Extra-String: %s" % estr)
        if self["content-type"].startswith("text/"):
            print("Content-Transfer-Encoding: 8bit")
            print("Content-Type: %s; charset=%s"
                  % (self["content-type"], DEFAULT_ENCODING))
        else:
            print("Content-Transfer-Encoding: base64")
            print("Content-Type: %s;" % (self["content-type"]))
        print()
        print(self["body"])
        print()

    def init_from_etree(self, element):
        assert element.tag == "comment", element.tag
        for field in element.getchildren():
            text = unescape(bytes(field.text).decode("unicode_escape").strip())
            if field.tag == "extra-string":
                if "extra-strings" in self:
                    self["extra-strings"].append(text)
                else:
                    self["extra-strings"] = [text]
            else:
                if field.tag == "body":
                    text += "\n"
                self[field.tag] = text


def print_to_mbox(element):
    if element.tag == "bug":
        b = Bug()
        b.init_from_etree(element)
        b.print_to_mbox()
    elif element.tag == "comment":
        c = Comment()
        c.init_from_etree(element)
        c.print_to_mbox()
    elif element.tag in ["be-xml"]:
        for elt in element.getchildren():
            print_to_mbox(elt)


if __name__ == "__main__":
    import codecs
    import sys

    sys.stdin = codecs.getreader(DEFAULT_ENCODING)(sys.stdin)
    sys.stdout = codecs.getwriter(DEFAULT_ENCODING)(sys.stdout)

    if len(sys.argv) == 1:  # no filename given, use stdin
        xml_unicode = sys.stdin.read()
    else:
        xml_unicode = codecs.open(sys.argv[1], "r", DEFAULT_ENCODING).read()
    xml_str = xml_unicode.encode("unicode_escape").replace(r"\n", "\n")
    elist = ElementTree.XML(xml_str)
    print_to_mbox(elist)