From 691a5d554757b6ccb45baa70691f0a730c5f5a0c Mon Sep 17 00:00:00 2001 From: Izidor MatuĊĦov Date: Sat, 8 Feb 2014 00:55:54 +0000 Subject: Ignore links in welcome message Some groups, e.g. django-oscar [1], have links in welcome message. Those are not supposed to be a link to the next page, ignore them. 1: https://groups.google.com/forum/#!forum/django-oscar --- gg_scraper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gg_scraper.py b/gg_scraper.py index cb6bc4e..640d304 100755 --- a/gg_scraper.py +++ b/gg_scraper.py @@ -220,9 +220,11 @@ class Group(Page): BS = self._get_page_BS(target_stack.pop(0)) for a_elem in BS.find_all('a'): is_topic, res = self.get_one_topic(a_elem) + # Ignore link in welcome message, e.g. django-oscar group + is_welcomemsg = a_elem.get('target') == 'welcomeMsg' if is_topic: out.append(res) - else: + elif not is_welcomemsg: other.append(res) if len(other) == 1: -- cgit