aboutsummaryrefslogtreecommitdiffstats
path: root/gg_scrapper.py
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@redhat.com>2013-12-30 01:27:56 +0100
committerMatěj Cepl <mcepl@redhat.com>2013-12-30 01:30:36 +0100
commit163aa69fd2b435b2ef180a2fe91c8112e12e15c3 (patch)
tree596d82e7f985f0f0a77d188397e462bd92ff4643 /gg_scrapper.py
parent6ce68fd69aa0403766bac31c85be6bb4a3a286cc (diff)
downloadgg_scraper-163aa69fd2b435b2ef180a2fe91c8112e12e15c3.tar.gz
General structure of operation and MBOX writing.
So far, only unit test for the latter.
Diffstat (limited to 'gg_scrapper.py')
-rwxr-xr-xgg_scrapper.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/gg_scrapper.py b/gg_scrapper.py
index 13da91d..2ea9f92 100755
--- a/gg_scrapper.py
+++ b/gg_scrapper.py
@@ -1,5 +1,6 @@
#!/usr/bin/python3
+import mailbox
import re
import subprocess
import urllib.request
@@ -169,3 +170,32 @@ class Group(Page):
'There must be either one or none link to the next page!')
return out
+
+ def collect_group(self):
+ topics = self.get_topics()
+ for top in topics:
+ arts = top.get_articles()
+ top.articles = arts
+ for a in arts:
+ msg = a.collect_message()
+ a.raw_message = msg
+
+
+class MBOX(mailbox.mbox):
+ def __init__(self, filename):
+ super(MBOX, self).__init__()
+ self.box_name = filename
+
+ def write_group(self, group_object):
+ pass
+
+
+def main(group_name, group_URL):
+ # Collect all messages to the internal variables
+ grp = Group(group_URL)
+ grp.collect_group()
+
+ # Write MBOX
+ mbx = MBOX()
+ mbx.format_mbox(grp)
+ mbx.save("{}.mbx".format(group_name))