aboutsummaryrefslogtreecommitdiffstats
path: root/xml2static_rst.py
blob: f5c0d68e42ce31d4234b6189bcce96a676eb3c67 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/python3

from os.path import basename, exists, normpath, splitext
from pprint import pprint
from urllib.parse import urlparse
from xml.etree import ElementTree as ET

ns = {'dflt': 'http://disqus.com',
      'xsi': 'http://disqus.com/disqus-internals',
      'dsq': 'http://www.w3.org/2001/XMLSchema-instance'}

tree = ET.parse("comments.xml")
root = tree.getroot()
threads = {}
for thread in root.findall("dflt:thread", ns):
    thread_id = thread.attrib[f"{{{ns['xsi']}}}id"]
    thread_link = thread.find("dflt:link", ns).text.strip()
    if thread_id not in threads:
        threads[thread_id] = {}
    threads[thread_id]['link'] = thread_link
    threads[thread_id]['blurb'] = splitext(basename(normpath(urlparse(thread_link).path)))[0].strip()
    threads[thread_id]['msgs'] = []

out = {}
for com_post in root.findall("dflt:post", ns):
    if com_post.find('dflt:isSpam', ns).text == 'false':
        thread = com_post.find('dflt:thread', ns)
        thread_id = thread.attrib[f"{{{ns['xsi']}}}id"]
        blurb = threads[thread_id]['blurb']
        if blurb not in out:
            out[blurb] = ""
        msg = com_post.find('dflt:message', ns).text
        if msg is not None:
            out[blurb] += msg  + '\n'
    
for post in out:
    if len(out[post].strip()) > 0:
        print(post + '\n' + out[post])
    
# if not os.path.exists('comments/'):
#     os.mkdir('comments')