diff options
author | Matěj Cepl <mcepl@redhat.com> | 2014-01-09 00:44:42 +0100 |
---|---|---|
committer | Matěj Cepl <mcepl@cepl.eu> | 2016-04-16 12:15:16 +0200 |
commit | a0efefaaa17651ba01633edae80d8b2e3ebeed85 (patch) | |
tree | 6b3b41ae85dbcf0ae28f6c81c61a613d8d4510ee /gg_scraper.py | |
parent | 4dfe96db384a98c654f31063736a2e0c84a9ea69 (diff) | |
download | gg_scraper-a0efefaaa17651ba01633edae80d8b2e3ebeed85.tar.gz |
More debugging
Diffstat (limited to 'gg_scraper.py')
-rwxr-xr-x | gg_scraper.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/gg_scraper.py b/gg_scraper.py index 4c7fef7..50c437a 100755 --- a/gg_scraper.py +++ b/gg_scraper.py @@ -145,7 +145,8 @@ class Topic(Page): def __init__(self, URL, name): super(Topic, self).__init__() self.name = name - self.root = self.unenscape_Google_bang_URL(URL) + root_URL = self.unenscape_Google_bang_URL(URL) + self.root = root_URL self.articles = [] def __unicode__(self): @@ -213,6 +214,7 @@ class Group(Page): def get_one_topic(elem): sys.stdout.write('. ') sys.stdout.flush() + #logging.debug('URL collected = {0}'.format(elem['href'])) if 'title' in elem.attrs: # filter out all-non-topic <a>s return True, Topic(elem['href'], elem['title']) |