aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.travis.yml10
-rwxr-xr-xgg_scrapper.py50
-rw-r--r--setup.py5
-rw-r--r--test/test_functional.py3
-rw-r--r--test/test_unit.py3
5 files changed, 49 insertions, 22 deletions
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..57f70f9
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,10 @@
+language: python
+python:
+ - "2.6"
+ - "2.7"
+ - "pypy"
+ - "3.2"
+ - "3.3"
+before_script:
+ - "pip install --use-mirrors ."
+script: PYTHONPATH=$PYTHONPATH:. python test/test_html2text.py -v
diff --git a/gg_scrapper.py b/gg_scrapper.py
index c11d69f..40a255e 100755
--- a/gg_scrapper.py
+++ b/gg_scrapper.py
@@ -1,4 +1,5 @@
#!/usr/bin/python3
+# -*- coding: utf-8 -*-
"""
Download a Google Group to MBOX
Copyright (C) 2014 Matěj Cepl
@@ -16,18 +17,26 @@ General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.'
"""
+from __future__ import absolute_import, print_function, unicode_literals
import argparse
-from configparser import ConfigParser
+try:
+ from configparser import ConfigParser
+except ImportError:
+ from ConfigParser import ConfigParser
import mailbox
import os.path
import re
import shutil
import subprocess
import sys
-import urllib.error
-import urllib.parse
-import urllib.request
+try:
+ from urllib.error import HTTPError
+ from urllib.request import HTTPHandler, HTTPRedirectHandler, \
+ build_opener
+except ImportError:
+ from urllib2 import (HTTPError, HTTPHandler, HTTPRedirectHandler,
+ build_opener)
#from concurrent.futures import ProcessPoolExecutor
from bs4 import BeautifulSoup
import logging
@@ -43,11 +52,11 @@ __version__ = '0.3'
class Page(object):
- verb_handler = urllib.request.HTTPHandler()
+ verb_handler = HTTPHandler()
if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
verb_handler.set_http_debuglevel(2)
- redir_handler = urllib.request.HTTPRedirectHandler()
- opener = urllib.request.build_opener(verb_handler, redir_handler)
+ redir_handler = HTTPRedirectHandler()
+ opener = build_opener(verb_handler, redir_handler)
def __init__(self):
pass
@@ -72,7 +81,7 @@ class Page(object):
new_URL = res.geturl()
return cls.unenscape_Google_bang_URL(new_URL)
else:
- raise urllib.error.HTTPError('Unknown URL: {}'.format(URL))
+ raise HTTPError('Unknown URL: {}'.format(URL))
def _get_page_BS(self, URL):
res = self.opener.open(self.do_redirect(URL))
@@ -91,17 +100,20 @@ class Article(Page):
def collect_message(self):
logging.debug('self.root = {}'.format(self.root))
try:
- with self.opener.open(self.root) as res:
- raw_msg = res.read()
- proc = subprocess.Popen(['/usr/bin/formail'],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- universal_newlines=True)
- result = proc.communicate(raw_msg.decode())[0]
- return result
- except urllib.error.HTTPError as exc:
+ res = self.opener.open(self.root)
+ raw_msg = res.read()
+ proc = subprocess.Popen(['/usr/bin/formail'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ result = proc.communicate(raw_msg.decode())[0]
+ except HTTPError as exc:
logging.warning('Exception on downloading {}:\n{}'.format(
self.root, exc))
+ finally:
+ res.close()
+
+ return result
class Topic(Page):
@@ -248,8 +260,8 @@ class Group(Page):
class MBOX(mailbox.mbox):
def __init__(self, filename):
if os.path.exists(filename):
- shutil.move(filename, '{}.bak'.format(filename))
- super(MBOX, self).__init__(filename)
+ shutil.move(filename, '{0}.bak'.format(filename))
+ mailbox.mbox.__init__(self, filename)
self.box_name = filename
def write_group(self, group_object):
diff --git a/setup.py b/setup.py
index a29fd4e..bec4529 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, print_function, unicode_literals
from distutils.core import setup, Command
import unittest
@@ -51,4 +53,5 @@ setup(name='gg_scrapper',
classifiers=classifiers,
cmdclass={
'test': RunTests,
- })
+ },
+ requires=['beautifulsoup4', 'PyYAML'])
diff --git a/test/test_functional.py b/test/test_functional.py
index 3e8d874..ec76998 100644
--- a/test/test_functional.py
+++ b/test/test_functional.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import logging
+import io
import os.path
import unittest
import gg_scrapper
@@ -40,7 +41,7 @@ class TestGGScrapperFunctional(unittest.TestCase):
rfc_msg = '\n'.join(rfc_msg.split('\n')[1:])
exp_file_name = os.path.join(os.path.dirname(__file__), 'message.eml')
- with open(exp_file_name, 'r', encoding='utf8') as exp_f:
+ with io.open(exp_file_name, 'r', encoding='utf8') as exp_f:
self.assertEqual(rfc_msg, exp_f.read())
diff --git a/test/test_unit.py b/test/test_unit.py
index 9138466..70b001b 100644
--- a/test/test_unit.py
+++ b/test/test_unit.py
@@ -61,7 +61,8 @@ class TestDemangle(unittest.TestCase):
with open('unmangled.mbx') as obs_mbx_f:
with open('test/mbox_unmangled.mbx') as exp_mbx_f:
self.assertAlmostEqual(len(obs_mbx_f.read()),
- len(exp_mbx_f.read()))
+ len(exp_mbx_f.read()),
+ delta=100)
if __name__ == '__main__':
unittest.main()