diff options
-rwxr-xr-x | gg_scraper.py | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/gg_scraper.py b/gg_scraper.py index 0628dd3..556fbb9 100755 --- a/gg_scraper.py +++ b/gg_scraper.py @@ -199,21 +199,23 @@ class Group(Page): of the topic page. ''' out = [] - other = [] - BS = self._get_page_BS(self.group_URL) - for a_elem in BS.find_all('a'): - is_topic, res = self.get_one_topic(a_elem) - if is_topic: - out.append(res) - else: - other.append(res) - - if len(other) == 1: - new_bs = Group(other[0]['href']) - out.extend(new_bs.get_topics()) - elif len(other) != 0: - raise ValueError( - 'There must be either one or none link to the next page!') + target_stack = [self.group_URL] + + while target_stack: + other = [] + BS = self._get_page_BS(target_stack.pop(0)) + for a_elem in BS.find_all('a'): + is_topic, res = self.get_one_topic(a_elem) + if is_topic: + out.append(res) + else: + other.append(res) + + if len(other) == 1: + target_stack.append(other[0]['href']) + elif len(other) != 0: + raise ValueError( + 'There must be either one or none link to the next page!') sys.stdout.write('\n') sys.stdout.flush() |