aboutsummaryrefslogtreecommitdiffstats
path: root/xml2static_rst.py
diff options
context:
space:
mode:
Diffstat (limited to 'xml2static_rst.py')
-rw-r--r--xml2static_rst.py44
1 files changed, 34 insertions, 10 deletions
diff --git a/xml2static_rst.py b/xml2static_rst.py
index be936a2..54989a8 100644
--- a/xml2static_rst.py
+++ b/xml2static_rst.py
@@ -1,5 +1,6 @@
#!/usr/bin/python3
+from datetime import datetime
import logging
import sys
from os.path import basename, exists, normpath, splitext
@@ -11,10 +12,10 @@ from urllib.parse import urlparse
from xml.etree import ElementTree as ET
ns = {'dflt': 'http://disqus.com',
- 'dsi': 'http://disqus.com/disqus-internals',
- 'dsq': 'http://www.w3.org/2001/XMLSchema-instance'}
+ 'dsq': 'http://disqus.com/disqus-internals',
+ 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}
logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
- level=logging.INFO)
+ level=logging.DEBUG)
log = logging.getLogger()
def init(filename: str) -> ET.Element:
@@ -30,7 +31,7 @@ def init(filename: str) -> ET.Element:
def collect_threads(root: ET.Element) -> Dict[int, Dict[str, str|List]]:
threads: Dict[int, Dict[str, str|List]] = {}
for thread in root.findall("dflt:thread", ns):
- thread_id = int(thread.get(f"{{{ns['dsi']}}}id", 0))
+ thread_id = int(thread.get(f"{{{ns['dsq']}}}id", 0))
thread_link = thread.findtext("dflt:link", '', ns).strip()
if thread_id not in threads:
threads[thread_id] = {}
@@ -49,11 +50,26 @@ def collect_posts(root: ET.Element, threads: dict) -> dict:
is_del = com_post.findtext('dflt:isDeleted', '', ns) == 'true'
if not (is_spam or is_del):
print('.', end='', file=sys.stderr, flush=True)
+ comment_id = int(com_post.get(f"{{{ns['dsq']}}}id", 0))
thread = com_post.find('dflt:thread', ns)
- thread_id = int(thread.get(f"{{{ns['dsi']}}}id", 0))
+ thread_id = int(thread.get(f"{{{ns['dsq']}}}id", 0))
blurb = threads[thread_id]['blurb']
if blurb not in out:
- out[blurb] = ""
+ out[blurb] = {}
+ out[blurb]['msg'] = ''
+
+ parent = com_post.find('dflt:parent', ns)
+ if parent is not None:
+ parent_id = int(parent.get(f"{{{ns['dsq']}}}id", 0))
+ else:
+ parent_id = 0
+
+ created_str = com_post.findtext('dflt:createdAt', '', ns).strip()
+ if created_str:
+ created_at = datetime.strptime(created_str, "%Y-%m-%dT%H:%M:%SZ")
+ else:
+ created_at = datetime.fromtimestamp(0)
+
html_msg = com_post.find('dflt:message', ns)
if html_msg is not None:
html_msg = html_msg.text
@@ -61,10 +77,18 @@ def collect_posts(root: ET.Element, threads: dict) -> dict:
stderr=DEVNULL, stdout=PIPE, stdin=PIPE,
encoding='utf-8')
html_msg = pd_proc.communicate(html_msg)[0]
- out[blurb] += html_msg + '\n'
+ if created_at:
+ html_msg = f":date: {created_at:%d.%m.%Y}\n\n{html_msg}"
+ if parent_id:
+ html_msg = f":parentID#: {parent_id}\n{html_msg}"
+ if comment_id:
+ html_msg = f":commentID#: {comment_id}\n{html_msg}"
+ out[blurb]['msg'] += html_msg + '\n'
+
+
print('', file=sys.stderr)
return out
-
+
# if not os.path.exists('comments/'):
# os.mkdir('comments')
@@ -73,6 +97,6 @@ if __name__=='__main__':
threads = collect_threads(root)
posts = collect_posts(root, threads)
for post in posts:
- if len(posts[post].strip()) > 0:
- print(post + '\n' + posts[post])
+ print('\n' + post)
+ pprint(posts[post])