aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@redhat.com>2014-01-08 00:54:57 +0100
committerMatěj Cepl <mcepl@redhat.com>2014-01-11 10:50:35 +0100
commit532b2f96e27201df320988ab2a61dadd230843a7 (patch)
tree46543b200036f09778f629d8c3b2bb8792f4a975
parent0009e30e4ad61f40ca56c58ecbdfbdc73809beee (diff)
downloadgg_scraper-532b2f96e27201df320988ab2a61dadd230843a7.tar.gz
Rewrite Group.get_topics to be iterative rather than recursive.
Fixes #284
-rwxr-xr-xgg_scraper.py32
1 files changed, 17 insertions, 15 deletions
diff --git a/gg_scraper.py b/gg_scraper.py
index 0628dd3..556fbb9 100755
--- a/gg_scraper.py
+++ b/gg_scraper.py
@@ -199,21 +199,23 @@ class Group(Page):
of the topic page.
'''
out = []
- other = []
- BS = self._get_page_BS(self.group_URL)
- for a_elem in BS.find_all('a'):
- is_topic, res = self.get_one_topic(a_elem)
- if is_topic:
- out.append(res)
- else:
- other.append(res)
-
- if len(other) == 1:
- new_bs = Group(other[0]['href'])
- out.extend(new_bs.get_topics())
- elif len(other) != 0:
- raise ValueError(
- 'There must be either one or none link to the next page!')
+ target_stack = [self.group_URL]
+
+ while target_stack:
+ other = []
+ BS = self._get_page_BS(target_stack.pop(0))
+ for a_elem in BS.find_all('a'):
+ is_topic, res = self.get_one_topic(a_elem)
+ if is_topic:
+ out.append(res)
+ else:
+ other.append(res)
+
+ if len(other) == 1:
+ target_stack.append(other[0]['href'])
+ elif len(other) != 0:
+ raise ValueError(
+ 'There must be either one or none link to the next page!')
sys.stdout.write('\n')
sys.stdout.flush()