aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.rst4
-rwxr-xr-xgg_scraper.py (renamed from gg_scrapper.py)2
-rw-r--r--setup.py10
-rw-r--r--test/group.yaml22
-rw-r--r--test/test_functional.py12
-rw-r--r--test/test_unit.py14
6 files changed, 32 insertions, 32 deletions
diff --git a/README.rst b/README.rst
index 8e09332..0d7e98a 100644
--- a/README.rst
+++ b/README.rst
@@ -12,11 +12,11 @@ Current bugs are filled at my bugzilla_ and new ones can be reported via
email (one of many of my addresses are available on my `Github page`_ )
.. _bugzilla:
- https://luther.ceplovi.cz/bugzilla/buglist.cgi?quicksearch=product%3Agg_scrapper
+ https://luther.ceplovi.cz/bugzilla/buglist.cgi?quicksearch=product%3Agg_scraper
.. _`Github page`:
https://github.com/mcepl
Of course pull requests are more than welcome in the same places as well. Currently all development is done with Python 3.3, but tests are run on Travis-CI for 2.7 and pypy as well.
-.. image:: https://secure.travis-ci.org/mcepl/gg_scrapper.png
+.. image:: https://secure.travis-ci.org/mcepl/gg_scraper.png
:alt: Build Status
diff --git a/gg_scrapper.py b/gg_scraper.py
index 62f93bd..209b92c 100755
--- a/gg_scrapper.py
+++ b/gg_scraper.py
@@ -48,7 +48,7 @@ MANGLED_ADDR_RE = re.compile(
r'([a-zA-Z0-9_.+-]+\.\.\.@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
re.IGNORECASE)
-__version__ = '0.4'
+__version__ = '0.5'
class Page(object):
diff --git a/setup.py b/setup.py
index 74d98dc..ccaec04 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function, unicode_literals
from distutils.core import setup, Command
import unittest
-import gg_scrapper
+import gg_scraper
class RunTests(Command):
@@ -41,14 +41,14 @@ def get_long_description():
lines = open('README.rst').read().splitlines(False)
return '\n' + '\n'.join(lines) + '\n'
-setup(name='gg_scrapper',
- version=gg_scrapper.__version__,
+setup(name='gg_scraper',
+ version=gg_scraper.__version__,
description='Download a Google Group to MBOX',
long_description=get_long_description(),
author='Matěj Cepl',
author_email='mcepl@cepl.eu',
- url='http://luther.ceplovi.cz/git/gg_scrapper.git',
- scripts=['gg_scrapper.py'],
+ url='http://luther.ceplovi.cz/git/gg_scraper.git',
+ scripts=['gg_scraper.py'],
keywords=['email', 'Google Groups', 'scrap', 'backup'],
license='GNU GPL',
classifiers=classifiers,
diff --git a/test/group.yaml b/test/group.yaml
index 59a33d4..82f0e31 100644
--- a/test/group.yaml
+++ b/test/group.yaml
@@ -1,10 +1,10 @@
-!!python/object:gg_scrapper.Group
+!!python/object:gg_scraper.Group
group_URL: https://groups.google.com/forum/#!forum/javascriptcz
name: javascriptcz
topics:
-- !!python/object:gg_scrapper.Topic
+- !!python/object:gg_scraper.Topic
articles:
- - !!python/object:gg_scrapper.Article {raw_message: "From scho...@schovi.cz Thu Jan\
+ - !!python/object:gg_scraper.Article {raw_message: "From scho...@schovi.cz Thu Jan\
\ 2 16:12:04 2014\nReceived: by 10.224.192.193 with SMTP id dr1mr1092656qab.9.1306595926917;\n\
\ Sat, 28 May 2011 08:18:46 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\
Received: by 10.224.187.145 with SMTP id cw17ls698645qab.2.gmail; Sat, 28 May\n\
@@ -23,9 +23,9 @@ topics:
\n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/5tbTMhlt4s0/t7QWA3IHsV0J'}
name: "Zdroje, kter\xE9 byste nem\u011Bli minout."
root: https://groups.google.com/forum/?_escaped_fragment_=topic/javascriptcz/5tbTMhlt4s0
-- !!python/object:gg_scrapper.Topic
+- !!python/object:gg_scraper.Topic
articles:
- - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\
+ - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\
\ Jan 2 16:12:06 2014\nReceived: by 10.224.136.200 with SMTP id s8mr198006qat.21.1306398867334;\n\
\ Thu, 26 May 2011 01:34:27 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\
Received: by 10.224.187.145 with SMTP id cw17ls261575qab.2.gmail; Thu, 26 May\n\
@@ -46,7 +46,7 @@ topics:
\ v=B9echny pages, panes, buttons, widgets, atd. tak=BEe t=\ny\nappky pak vypadaly\
\ v=B9echny stejn=EC (hnusn=EC) a bylo to t=EC=BEkop=E1dn=\n=FD.\n\nhttp://blog.sproutcore.com/announcing-sproutcore-2-0/\n\
\n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/-4cy0XTGGaU/pUoGBDjK_HcJ'}
- - !!python/object:gg_scrapper.Article {raw_message: "From damn...@gmail.com Thu Jan\
+ - !!python/object:gg_scraper.Article {raw_message: "From damn...@gmail.com Thu Jan\
\ 2 16:12:07 2014\nReceived: by 10.100.168.2 with SMTP id q2mr1217945ane.14.1306495789592;\n\
\ Fri, 27 May 2011 04:29:49 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\
Received: by 10.90.58.22 with SMTP id g22ls443766aga.2.gmail; Fri, 27 May 2011\n\
@@ -71,7 +71,7 @@ topics:
\ pages, panes, buttons, widgets, atd. tak=BEe=\n ty\n> appky pak vypadaly v=B9echny\
\ stejn=EC (hnusn=EC) a bylo to t=EC=BEkop=E1d=\nn=FD.\n>\n> http://blog.sproutcore.com/announcing-sproutcore-2-0/\n\
\n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/-4cy0XTGGaU/Gxus9ddtp5wJ'}
- - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\
+ - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\
\ Jan 2 16:12:07 2014\nReceived: by 10.224.9.144 with SMTP id l16mr693260qal.26.1306497361290;\n\
\ Fri, 27 May 2011 04:56:01 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\
Received: by 10.224.175.74 with SMTP id w10ls508844qaz.0.gmail; Fri, 27 May\n\
@@ -109,9 +109,9 @@ topics:
\n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/-4cy0XTGGaU/IpL3eL0yancJ'}
name: SproutCore
root: https://groups.google.com/forum/?_escaped_fragment_=topic/javascriptcz/-4cy0XTGGaU
-- !!python/object:gg_scrapper.Topic
+- !!python/object:gg_scraper.Topic
articles:
- - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\
+ - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\
\ Jan 2 16:12:09 2014\nReceived: by 10.224.126.72 with SMTP id b8mr1923833qas.13.1306351509553;\n\
\ Wed, 25 May 2011 12:25:09 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\
Received: by 10.224.181.131 with SMTP id by3ls169717qab.3.gmail; Wed, 25 May\n\
@@ -127,9 +127,9 @@ topics:
\ charset=ISO-8859-1\n\n\n#javascript.cz na irc.freenode.net\n\n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/KpLLZ7thax4/nxKqd5qBVTIJ'}
name: "IRC kan\xE1l"
root: https://groups.google.com/forum/?_escaped_fragment_=topic/javascriptcz/KpLLZ7thax4
-- !!python/object:gg_scrapper.Topic
+- !!python/object:gg_scraper.Topic
articles:
- - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\
+ - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\
\ Jan 2 16:12:10 2014\nReceived: by 10.224.215.3 with SMTP id hc3mr1823425qab.4.1306325120198;\n\
\ Wed, 25 May 2011 05:05:20 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\
Received: by 10.224.138.148 with SMTP id a20ls76694qau.5.gmail; Wed, 25 May\n\
diff --git a/test/test_functional.py b/test/test_functional.py
index ec76998..c8f5bf2 100644
--- a/test/test_functional.py
+++ b/test/test_functional.py
@@ -4,7 +4,7 @@ import logging
import io
import os.path
import unittest
-import gg_scrapper
+import gg_scraper
IN_URL = 'https://groups.google.com/forum/#!forum/jbrout'
ORIG_URL = 'http://groups.google.com/d/forum/jbrout'
@@ -17,16 +17,16 @@ ARTICLE_URL = 'https://groups.google.com/d/msg/jbrout' + \
class TestGGScrapperFunctional(unittest.TestCase):
def test_collecting_topics(self):
- page = gg_scrapper.Group(IN_URL)
+ page = gg_scraper.Group(IN_URL)
topics = page.get_topics()
logging.debug("number of topics = %d", len(topics))
self.assertGreater(len(topics), 0)
def test_collecting_articles(self):
logging.debug('topic = URL {}'.format(TOPIC_URL))
- topic = gg_scrapper.Topic(TOPIC_URL,
- 'repo version incompatible with ' +
- 'ubuntu 11.04 ?')
+ topic = gg_scraper.Topic(TOPIC_URL,
+ 'repo version incompatible with ' +
+ 'ubuntu 11.04 ?')
articles = topic.get_articles()
article_count = topic.get_count_articles()
logging.debug('article_count = {0:d}'.format(article_count))
@@ -35,7 +35,7 @@ class TestGGScrapperFunctional(unittest.TestCase):
def test_get_raw_article(self):
self.maxDiff = None
- article = gg_scrapper.Article(ARTICLE_URL)
+ article = gg_scraper.Article(ARTICLE_URL)
rfc_msg = article.collect_message().replace('\r\n', '\n')
rfc_msg = '\n'.join(rfc_msg.split('\n')[1:])
diff --git a/test/test_unit.py b/test/test_unit.py
index 70b001b..503aafe 100644
--- a/test/test_unit.py
+++ b/test/test_unit.py
@@ -2,8 +2,8 @@ import os
import tempfile
import yaml
import unittest
-import gg_scrapper
-from gg_scrapper import Group, Topic, Article # noqa
+import gg_scraper
+from gg_scraper import Group, Topic, Article # noqa
IN_URL = 'https://groups.google.com/forum/#!forum/jbrout'
ORIG_URL = 'http://groups.google.com/d/forum/jbrout'
@@ -13,11 +13,11 @@ EXP_URL = 'https://groups.google.com/forum/' + \
class TestGGScrapper(unittest.TestCase):
def test_URL_conversion(self):
- obs_URL = gg_scrapper.Group.unenscape_Google_bang_URL(IN_URL)
+ obs_URL = gg_scraper.Group.unenscape_Google_bang_URL(IN_URL)
self.assertEqual(obs_URL, EXP_URL)
def test_do_redirect(self):
- obs_URL = gg_scrapper.Group.do_redirect(ORIG_URL)
+ obs_URL = gg_scraper.Group.do_redirect(ORIG_URL)
self.assertEqual(obs_URL, EXP_URL)
@@ -31,7 +31,7 @@ class TestMBOX(unittest.TestCase):
'''Create a mbox file from (YAMLed) Group
'''
mbx_file = tempfile.NamedTemporaryFile('w', delete=False)
- mbx = gg_scrapper.MBOX(mbx_file.name)
+ mbx = gg_scraper.MBOX(mbx_file.name)
mbx.write_group(self.group)
with open('test/mbox.mbx') as exp_f:
@@ -55,8 +55,8 @@ class TestMBOX(unittest.TestCase):
class TestDemangle(unittest.TestCase):
def test_demangle(self):
self.maxDiff = None
- gg_scrapper.demangle('test/unmangled_address.cnf',
- 'test/mbox.mbx', 'unmangled.mbx')
+ gg_scraper.demangle('test/unmangled_address.cnf',
+ 'test/mbox.mbx', 'unmangled.mbx')
with open('unmangled.mbx') as obs_mbx_f:
with open('test/mbox_unmangled.mbx') as exp_mbx_f: