From 532b2f96e27201df320988ab2a61dadd230843a7 Mon Sep 17 00:00:00 2001 From: Matěj Cepl Date: Wed, 8 Jan 2014 00:54:57 +0100 Subject: Rewrite Group.get_topics to be iterative rather than recursive. Fixes #284 --- gg_scraper.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/gg_scraper.py b/gg_scraper.py index 0628dd3..556fbb9 100755 --- a/gg_scraper.py +++ b/gg_scraper.py @@ -199,21 +199,23 @@ class Group(Page): of the topic page. ''' out = [] - other = [] - BS = self._get_page_BS(self.group_URL) - for a_elem in BS.find_all('a'): - is_topic, res = self.get_one_topic(a_elem) - if is_topic: - out.append(res) - else: - other.append(res) - - if len(other) == 1: - new_bs = Group(other[0]['href']) - out.extend(new_bs.get_topics()) - elif len(other) != 0: - raise ValueError( - 'There must be either one or none link to the next page!') + target_stack = [self.group_URL] + + while target_stack: + other = [] + BS = self._get_page_BS(target_stack.pop(0)) + for a_elem in BS.find_all('a'): + is_topic, res = self.get_one_topic(a_elem) + if is_topic: + out.append(res) + else: + other.append(res) + + if len(other) == 1: + target_stack.append(other[0]['href']) + elif len(other) != 0: + raise ValueError( + 'There must be either one or none link to the next page!') sys.stdout.write('\n') sys.stdout.flush() -- cgit