aboutsummaryrefslogtreecommitdiffstats
path: root/bs_test.py
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@redhat.com>2013-12-27 03:42:30 +0100
committerMatěj Cepl <mcepl@redhat.com>2013-12-28 00:04:17 +0100
commit9dcfa6e10d512cb767dac9c410c96072f7cbd166 (patch)
tree38b59a4d256a23f7aef31122b45283b420cb21cd /bs_test.py
parentb33d37e962697141043e3c6d71417a525808d405 (diff)
downloadgg_scraper-9dcfa6e10d512cb767dac9c410c96072f7cbd166.tar.gz
Collecting topics.
Added also some testing pages.
Diffstat (limited to 'bs_test.py')
-rw-r--r--bs_test.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/bs_test.py b/bs_test.py
new file mode 100644
index 0000000..5b6a840
--- /dev/null
+++ b/bs_test.py
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+from bs4 import BeautifulSoup
+import sys
+import re
+
+TOPIC_COUNT_RE = re.compile(r'\D+ \d+ - \d+ \D+ (\d+) \D+$')
+
+bs = BeautifulSoup(open(sys.argv[1]))
+i_str = bs.find_all('i')[0].string
+
+print("i = %s" % i_str)
+count = int(TOPIC_COUNT_RE.match(i_str).group(1))
+print("match i = %d" % count)