diff options
author | Matěj Cepl <mcepl@redhat.com> | 2013-12-27 03:42:30 +0100 |
---|---|---|
committer | Matěj Cepl <mcepl@redhat.com> | 2013-12-28 00:04:17 +0100 |
commit | 9dcfa6e10d512cb767dac9c410c96072f7cbd166 (patch) | |
tree | 38b59a4d256a23f7aef31122b45283b420cb21cd /bs_test.py | |
parent | b33d37e962697141043e3c6d71417a525808d405 (diff) | |
download | gg_scraper-9dcfa6e10d512cb767dac9c410c96072f7cbd166.tar.gz |
Collecting topics.
Added also some testing pages.
Diffstat (limited to 'bs_test.py')
-rw-r--r-- | bs_test.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/bs_test.py b/bs_test.py new file mode 100644 index 0000000..5b6a840 --- /dev/null +++ b/bs_test.py @@ -0,0 +1,14 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from bs4 import BeautifulSoup +import sys +import re + +TOPIC_COUNT_RE = re.compile(r'\D+ \d+ - \d+ \D+ (\d+) \D+$') + +bs = BeautifulSoup(open(sys.argv[1])) +i_str = bs.find_all('i')[0].string + +print("i = %s" % i_str) +count = int(TOPIC_COUNT_RE.match(i_str).group(1)) +print("match i = %d" % count) |