aboutsummaryrefslogtreecommitdiffstats
path: root/gg_scraper.py
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2014-12-12 22:32:47 +0100
committerMatěj Cepl <mcepl@cepl.eu>2014-12-12 22:43:36 +0100
commit2416450498a418bd6daf9f8bac255a47064079e6 (patch)
tree7fc51a851a60c8613e5625c7df1dfcfdc49228e4 /gg_scraper.py
parent9f4254aad67783926ae8dc74a0b0ad2d528dd0f1 (diff)
downloadgg_scraper-2416450498a418bd6daf9f8bac255a47064079e6.tar.gz
Make python 2.7 default again and clean up.0.9.0
Switch setup.py to use setuptools. Fixes #1, fixes #2
Diffstat (limited to 'gg_scraper.py')
-rwxr-xr-xgg_scraper.py25
1 files changed, 15 insertions, 10 deletions
diff --git a/gg_scraper.py b/gg_scraper.py
index b1c3260..89939bf 100755
--- a/gg_scraper.py
+++ b/gg_scraper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Download a Google Group to MBOX
@@ -54,13 +54,17 @@ MANGLED_ADDR_RE = re.compile(
r'([a-zA-Z0-9_.+-]+(\.)+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
re.IGNORECASE)
-__version__ = '0.8'
+__version__ = '0.9.0'
pyver = sys.version_info
py26 = pyver[:2] < (2, 7)
py3k = pyver[0] == 3
+class BadURLError(ValueError):
+ pass
+
+
class Page(object):
verb_handler = HTTPHandler()
if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
@@ -172,14 +176,18 @@ class Topic(Page):
class Group(Page):
+ GOOD_URL_RE = re.compile(r'https://groups.google.com/forum/#!forum/(.+)')
+
def __init__(self, URL):
super(Group, self).__init__()
self.group_URL = URL
self.topics = []
- match = re.match(r'https://groups.google.com/forum/#!forum/(.+)',
- URL)
- if match is not None:
- self.name = match.group(1)
+ match = self.GOOD_URL_RE.match(URL)
+ logging.debug('match = %s', match)
+ if match is None:
+ raise BadURLError("Required URL in form 'https://groups.google.com/forum/#!forum/GROUPNAME'")
+
+ self.name = match.group(1)
@staticmethod
def get_count_topics(BS):
@@ -305,10 +313,7 @@ class MBOX(mailbox.mbox):
self.lock()
for mbx_str in group_object.all_messages():
try:
- if not py26:
- self.add(mbx_str.encode())
- else:
- self.add(mbx_str.encode('utf8'))
+ self.add(mbx_str.encode('utf8'))
except UnicodeDecodeError:
logging.warning('mbx_str = type {0}'.format(type(mbx_str)))
self.unlock()