aboutsummaryrefslogtreecommitdiffstats
path: root/gg_scraper.py
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@redhat.com>2014-01-09 00:44:42 +0100
committerMatěj Cepl <mcepl@cepl.eu>2016-04-16 12:15:16 +0200
commita0efefaaa17651ba01633edae80d8b2e3ebeed85 (patch)
tree6b3b41ae85dbcf0ae28f6c81c61a613d8d4510ee /gg_scraper.py
parent4dfe96db384a98c654f31063736a2e0c84a9ea69 (diff)
downloadgg_scraper-a0efefaaa17651ba01633edae80d8b2e3ebeed85.tar.gz
More debugging
Diffstat (limited to 'gg_scraper.py')
-rwxr-xr-xgg_scraper.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/gg_scraper.py b/gg_scraper.py
index 4c7fef7..50c437a 100755
--- a/gg_scraper.py
+++ b/gg_scraper.py
@@ -145,7 +145,8 @@ class Topic(Page):
def __init__(self, URL, name):
super(Topic, self).__init__()
self.name = name
- self.root = self.unenscape_Google_bang_URL(URL)
+ root_URL = self.unenscape_Google_bang_URL(URL)
+ self.root = root_URL
self.articles = []
def __unicode__(self):
@@ -213,6 +214,7 @@ class Group(Page):
def get_one_topic(elem):
sys.stdout.write('. ')
sys.stdout.flush()
+ #logging.debug('URL collected = {0}'.format(elem['href']))
if 'title' in elem.attrs:
# filter out all-non-topic <a>s
return True, Topic(elem['href'], elem['title'])