From 76d552e5401df990a601f245f30f45d7c13cdd1e Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sun, 12 Jul 2009 08:38:40 -0400 Subject: Added be-mbox-to-xml. Reworked to allow "be comment" to handle unicode strings (see bug e4ed63f6-9000-4d0b-98c3-487269140141). The solution was to escape all the unicode to produce and ASCII string before calling ElementTree.XML, and then converting back to unicode afterwards. Added a unicode-containing comment to the end of bug f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a so that there's a handy unicode comment for testing. XML headers (e.g. '') are now added to all xml output from be. Switched non-text/* encoding library to base64 instead of email.encoders, which makes that code in libbe/comment.py simpler. Changed libbe/mapfile.py error encoding from string_escape to unicode_escape so it can handle unicode. Everything's still untested, and be-xml-to-mbox doesn't handle unicode yet, but I felt this commit was getting a bit unwieldy ;). --- .../07fc448f-c42e-4846-929a-8924de485766/body | 8 ++++++++ .../07fc448f-c42e-4846-929a-8924de485766/values | 11 +++++++++++ .../520a9829-8d90-43ce-be64-868b8321e5b0/body | 1 + .../520a9829-8d90-43ce-be64-868b8321e5b0/values | 11 +++++++++++ .../8b54e56e-c693-4594-998f-5bd6c1f385d7/body | 5 +++++ .../8b54e56e-c693-4594-998f-5bd6c1f385d7/values | 11 +++++++++++ .../bb124fd9-08f5-4f82-a035-6355e8403075/body | 1 + .../bb124fd9-08f5-4f82-a035-6355e8403075/values | 11 +++++++++++ .../faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/body | 5 +++++ .../faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/values | 8 ++++++++ .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/values | 17 +++++++++++++++++ .../028d2e8d-5b0f-4c43-a913-35a1709b2276/values | 19 +++---------------- .../15602c0c-25e4-4c2c-9e24-79bdb90721b1/values | 19 +++---------------- .../3f556a48-c538-4569-8609-3e829b561d78/values | 19 +++---------------- .../f376debf-9f7e-4347-807f-00e7263487c7/body | 1 + .../f376debf-9f7e-4347-807f-00e7263487c7/values | 8 ++++++++ 16 files changed, 107 insertions(+), 48 deletions(-) create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/body create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/values create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/body create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/values create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/body create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/values create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/body create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/values create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/body create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/values create mode 100644 .be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/values create mode 100644 .be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/body create mode 100644 .be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/values (limited to '.be') diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/body b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/body new file mode 100644 index 0000000..0598d70 --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/body @@ -0,0 +1,8 @@ + � +Traceback (most recent call last): + File "", line 1, in + File "/usr/lib/python2.5/xml/etree/ElementTree.py", line 963, in XML + parser.feed(text) + File "/usr/lib/python2.5/xml/etree/ElementTree.py", line 1245, in feed + self._parser.Parse(data, 0) +UnicodeEncodeError: 'ascii' codec can't encode character u'\u1234' in position 6: ordinal not in range(128) diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/values b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/values new file mode 100644 index 0000000..cd8d8b9 --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/07fc448f-c42e-4846-929a-8924de485766/values @@ -0,0 +1,11 @@ +Content-type: text/plain + + +Date: Sun, 12 Jul 2009 11:34:22 +0000 + + +From: W. Trevor King + + +In-reply-to: faa686bf-c0eb-48bf-8a0b-d9a2e02bd132 + diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/body b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/body new file mode 100644 index 0000000..397d4b6 --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/body @@ -0,0 +1 @@ +It looks like etree wants a byte string, not unicode input diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/values b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/values new file mode 100644 index 0000000..8bdaf52 --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/520a9829-8d90-43ce-be64-868b8321e5b0/values @@ -0,0 +1,11 @@ +Content-type: text/plain + + +Date: Sun, 12 Jul 2009 11:42:16 +0000 + + +From: W. Trevor King + + +In-reply-to: faa686bf-c0eb-48bf-8a0b-d9a2e02bd132 + diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/body b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/body new file mode 100644 index 0000000..ce2bb8d --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/body @@ -0,0 +1,5 @@ +For example, this works: + +python -c 'from xml.etree import ElementTree; a=u"\u1234"; print type(a), a; b=ElementTree.XML(a.encode("unicode_escape")); print type(b.text), unicode(b.text).decode("unicode_escape");' + +Ugly though :p. Ah well. diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/values b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/values new file mode 100644 index 0000000..1784e0e --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/8b54e56e-c693-4594-998f-5bd6c1f385d7/values @@ -0,0 +1,11 @@ +Content-type: text/plain + + +Date: Sun, 12 Jul 2009 11:46:57 +0000 + + +From: W. Trevor King + + +In-reply-to: 520a9829-8d90-43ce-be64-868b8321e5b0 + diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/body b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/body new file mode 100644 index 0000000..89a8f8d --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/body @@ -0,0 +1 @@ +That's with Python 2.5.2 and ElementTree "2326 2005-03-17 07:45:21Z fredrik" diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/values b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/values new file mode 100644 index 0000000..cca07c3 --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/bb124fd9-08f5-4f82-a035-6355e8403075/values @@ -0,0 +1,11 @@ +Content-type: text/plain + + +Date: Sun, 12 Jul 2009 11:37:55 +0000 + + +From: W. Trevor King + + +In-reply-to: 07fc448f-c42e-4846-929a-8924de485766 + diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/body b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/body new file mode 100644 index 0000000..57e050d --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/body @@ -0,0 +1,5 @@ +Isolated problem to: + +python -c 'from xml.etree import ElementTree; a=u"\u1234"; print type(a), a; b=ElementTree.XML(a);' + +Output attached below diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/values b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/values new file mode 100644 index 0000000..e430ea0 --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/comments/faa686bf-c0eb-48bf-8a0b-d9a2e02bd132/values @@ -0,0 +1,8 @@ +Content-type: text/plain + + +Date: Sun, 12 Jul 2009 11:31:13 +0000 + + +From: W. Trevor King + diff --git a/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/values b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/values new file mode 100644 index 0000000..4bc81f5 --- /dev/null +++ b/.be/bugs/e4ed63f6-9000-4d0b-98c3-487269140141/values @@ -0,0 +1,17 @@ +creator: W. Trevor King + + +reporter: W. Trevor King + + +severity: minor + + +status: fixed + + +summary: utf8 problems in xml parsing + + +time: Sat, 11 Jul 2009 15:48:32 +0000 + diff --git a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/028d2e8d-5b0f-4c43-a913-35a1709b2276/values b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/028d2e8d-5b0f-4c43-a913-35a1709b2276/values index eb56317..d39c4a1 100644 --- a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/028d2e8d-5b0f-4c43-a913-35a1709b2276/values +++ b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/028d2e8d-5b0f-4c43-a913-35a1709b2276/values @@ -1,21 +1,8 @@ +Content-type: text/plain - -Content-type=text/plain - - - - - - -Date=Tue, 25 Nov 2008 19:41:02 +0000 - - - - - - -From=W. Trevor King +Date: Tue, 25 Nov 2008 19:41:02 +0000 +From: W. Trevor King diff --git a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/15602c0c-25e4-4c2c-9e24-79bdb90721b1/values b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/15602c0c-25e4-4c2c-9e24-79bdb90721b1/values index f976972..639fd4a 100644 --- a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/15602c0c-25e4-4c2c-9e24-79bdb90721b1/values +++ b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/15602c0c-25e4-4c2c-9e24-79bdb90721b1/values @@ -1,21 +1,8 @@ +Content-type: text/plain - -Content-type=text/plain - - - - - - -Date=Tue, 25 Nov 2008 02:36:16 +0000 - - - - - - -From=W. Trevor King +Date: Tue, 25 Nov 2008 02:36:16 +0000 +From: W. Trevor King diff --git a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/3f556a48-c538-4569-8609-3e829b561d78/values b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/3f556a48-c538-4569-8609-3e829b561d78/values index bf5085b..2821b2f 100644 --- a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/3f556a48-c538-4569-8609-3e829b561d78/values +++ b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/3f556a48-c538-4569-8609-3e829b561d78/values @@ -1,21 +1,8 @@ +Content-type: text/plain - -Content-type=text/plain - - - - - - -Date=Tue, 25 Nov 2008 03:02:59 +0000 - - - - - - -From=W. Trevor King +Date: Tue, 25 Nov 2008 03:02:59 +0000 +From: W. Trevor King diff --git a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/body b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/body new file mode 100644 index 0000000..b441da9 --- /dev/null +++ b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/body @@ -0,0 +1 @@ +Test unicode �quotes� diff --git a/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/values b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/values new file mode 100644 index 0000000..a67680d --- /dev/null +++ b/.be/bugs/f7ccd916-b5c7-4890-a2e3-8c8ace17ae3a/comments/f376debf-9f7e-4347-807f-00e7263487c7/values @@ -0,0 +1,8 @@ +Content-type: text/plain + + +Date: Sat, 11 Jul 2009 18:28:57 +0000 + + +From: W. Trevor King + -- cgit