diff options
author | Matěj Cepl <mcepl@redhat.com> | 2013-12-30 01:27:56 +0100 |
---|---|---|
committer | Matěj Cepl <mcepl@redhat.com> | 2013-12-30 01:30:36 +0100 |
commit | 163aa69fd2b435b2ef180a2fe91c8112e12e15c3 (patch) | |
tree | 596d82e7f985f0f0a77d188397e462bd92ff4643 /gg_scrapper.py | |
parent | 6ce68fd69aa0403766bac31c85be6bb4a3a286cc (diff) | |
download | gg_scraper-163aa69fd2b435b2ef180a2fe91c8112e12e15c3.tar.gz |
General structure of operation and MBOX writing.
So far, only unit test for the latter.
Diffstat (limited to 'gg_scrapper.py')
-rwxr-xr-x | gg_scrapper.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/gg_scrapper.py b/gg_scrapper.py index 13da91d..2ea9f92 100755 --- a/gg_scrapper.py +++ b/gg_scrapper.py @@ -1,5 +1,6 @@ #!/usr/bin/python3 +import mailbox import re import subprocess import urllib.request @@ -169,3 +170,32 @@ class Group(Page): 'There must be either one or none link to the next page!') return out + + def collect_group(self): + topics = self.get_topics() + for top in topics: + arts = top.get_articles() + top.articles = arts + for a in arts: + msg = a.collect_message() + a.raw_message = msg + + +class MBOX(mailbox.mbox): + def __init__(self, filename): + super(MBOX, self).__init__() + self.box_name = filename + + def write_group(self, group_object): + pass + + +def main(group_name, group_URL): + # Collect all messages to the internal variables + grp = Group(group_URL) + grp.collect_group() + + # Write MBOX + mbx = MBOX() + mbx.format_mbox(grp) + mbx.save("{}.mbx".format(group_name)) |