path: root/xml2static_rst.py


                  
#!/usr/bin/python3

from os.path import basename, exists, normpath, splitext
from pprint import pprint
from urllib.parse import urlparse
from xml.etree import ElementTree as ET

ns = {'dflt': 'http://disqus.com',
      'xsi': 'http://disqus.com/disqus-internals',
      'dsq': 'http://www.w3.org/2001/XMLSchema-instance'}

tree = ET.parse("comments.xml")
root = tree.getroot()
threads = {}
for thread in root.findall("dflt:thread", ns):
    thread_id = thread.attrib[f"{{{ns['xsi']}}}id"]
    thread_link = thread.find("dflt:link", ns).text.strip()
    if thread_id not in threads:
        threads[thread_id] = {}
    threads[thread_id]['link'] = thread_link
    threads[thread_id]['blurb'] = splitext(basename(normpath(urlparse(thread_link).path)))[0].strip()
    threads[thread_id]['msgs'] = []

out = {}
for com_post in root.findall("dflt:post", ns):
    is_spam = com_post.find('dflt:isSpam', ns).text == 'true'
    is_del = com_post.find('dflt:isDeleted', ns).text == 'true'
    if not (is_spam or is_del):
        thread = com_post.find('dflt:thread', ns)
        thread_id = thread.attrib[f"{{{ns['xsi']}}}id"]
        blurb = threads[thread_id]['blurb']
        if blurb not in out:
            out[blurb] = ""
        msg = com_post.find('dflt:message', ns).text
        if msg is not None:
            out[blurb] += msg  + '\n'
    
for post in out:
    if len(out[post].strip()) > 0:
        print(post + '\n' + out[post])
    
# if not os.path.exists('comments/'):
#     os.mkdir('comments')