aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2024-01-08 17:31:09 +0100
committerMatěj Cepl <mcepl@cepl.eu>2024-01-08 17:31:09 +0100
commitda2e12c20265160a3a684ae92f2f0817f73e3a34 (patch)
tree2e0965f33c9bcff04b199a98f6c865587439d616
parentf4fdcb4adb3e346c42f3be1dcb60ce0a151b1b71 (diff)
downloaddisqus_xml2static_rst-da2e12c20265160a3a684ae92f2f0817f73e3a34.tar.gz
More steps towards parsing the file.
-rw-r--r--comments.xml2
-rw-r--r--xml2static_rst.py30
2 files changed, 28 insertions, 4 deletions
diff --git a/comments.xml b/comments.xml
index cd257b1..ffe98a9 100644
--- a/comments.xml
+++ b/comments.xml
@@ -8680,7 +8680,7 @@ xsi:schemaLocation="http://disqus.com/api/schemas/1.0/disqus.xsd http://disqus.c
</message>
<createdAt>2021-11-18T16:16:33Z</createdAt>
<isDeleted>false</isDeleted>
- <isSpam>false</isSpam>
+ <isSpam>true</isSpam>
<author>
<name>morgan</name>
<isAnonymous>false</isAnonymous>
diff --git a/xml2static_rst.py b/xml2static_rst.py
index 9bbe0d6..f5c0d68 100644
--- a/xml2static_rst.py
+++ b/xml2static_rst.py
@@ -1,6 +1,8 @@
#!/usr/bin/python3
-import os.path
+from os.path import basename, exists, normpath, splitext
+from pprint import pprint
+from urllib.parse import urlparse
from xml.etree import ElementTree as ET
ns = {'dflt': 'http://disqus.com',
@@ -9,9 +11,31 @@ ns = {'dflt': 'http://disqus.com',
tree = ET.parse("comments.xml")
root = tree.getroot()
+threads = {}
+for thread in root.findall("dflt:thread", ns):
+ thread_id = thread.attrib[f"{{{ns['xsi']}}}id"]
+ thread_link = thread.find("dflt:link", ns).text.strip()
+ if thread_id not in threads:
+ threads[thread_id] = {}
+ threads[thread_id]['link'] = thread_link
+ threads[thread_id]['blurb'] = splitext(basename(normpath(urlparse(thread_link).path)))[0].strip()
+ threads[thread_id]['msgs'] = []
+
+out = {}
for com_post in root.findall("dflt:post", ns):
if com_post.find('dflt:isSpam', ns).text == 'false':
- print(ET.tostring(com_post, encoding='unicode', default_namespace=ns['dflt']))
-
+ thread = com_post.find('dflt:thread', ns)
+ thread_id = thread.attrib[f"{{{ns['xsi']}}}id"]
+ blurb = threads[thread_id]['blurb']
+ if blurb not in out:
+ out[blurb] = ""
+ msg = com_post.find('dflt:message', ns).text
+ if msg is not None:
+ out[blurb] += msg + '\n'
+
+for post in out:
+ if len(out[post].strip()) > 0:
+ print(post + '\n' + out[post])
+
# if not os.path.exists('comments/'):
# os.mkdir('comments')