diff options
author | Matěj Cepl <mcepl@redhat.com> | 2014-01-05 09:25:14 +0100 |
---|---|---|
committer | Matěj Cepl <mcepl@redhat.com> | 2014-01-11 10:50:25 +0100 |
commit | c5c5b68dd2ed591e0ad411bf3dde4611cb39f18c (patch) | |
tree | 533014d5bc07f3a5432800d059dd8c33fe446b2e | |
parent | 2b7981e4f8ad425d73936751789c7609f5541a1b (diff) | |
download | gg_scraper-c5c5b68dd2ed591e0ad411bf3dde4611cb39f18c.tar.gz |
scrapper -> scraper0.5
Woops!
scrapper: a fighter or aggressive competitor, especially one always
ready or eager for a fight, argument, or contest: the best lightweight
scrapper in boxing; a rugged political scrapper.
That's not what I meant.
-rw-r--r-- | README.rst | 4 | ||||
-rwxr-xr-x | gg_scraper.py (renamed from gg_scrapper.py) | 2 | ||||
-rw-r--r-- | setup.py | 10 | ||||
-rw-r--r-- | test/group.yaml | 22 | ||||
-rw-r--r-- | test/test_functional.py | 12 | ||||
-rw-r--r-- | test/test_unit.py | 14 |
6 files changed, 32 insertions, 32 deletions
@@ -12,11 +12,11 @@ Current bugs are filled at my bugzilla_ and new ones can be reported via email (one of many of my addresses are available on my `Github page`_ ) .. _bugzilla: - https://luther.ceplovi.cz/bugzilla/buglist.cgi?quicksearch=product%3Agg_scrapper + https://luther.ceplovi.cz/bugzilla/buglist.cgi?quicksearch=product%3Agg_scraper .. _`Github page`: https://github.com/mcepl Of course pull requests are more than welcome in the same places as well. Currently all development is done with Python 3.3, but tests are run on Travis-CI for 2.7 and pypy as well. -.. image:: https://secure.travis-ci.org/mcepl/gg_scrapper.png +.. image:: https://secure.travis-ci.org/mcepl/gg_scraper.png :alt: Build Status diff --git a/gg_scrapper.py b/gg_scraper.py index 62f93bd..209b92c 100755 --- a/gg_scrapper.py +++ b/gg_scraper.py @@ -48,7 +48,7 @@ MANGLED_ADDR_RE = re.compile( r'([a-zA-Z0-9_.+-]+\.\.\.@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)', re.IGNORECASE) -__version__ = '0.4' +__version__ = '0.5' class Page(object): @@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function, unicode_literals from distutils.core import setup, Command import unittest -import gg_scrapper +import gg_scraper class RunTests(Command): @@ -41,14 +41,14 @@ def get_long_description(): lines = open('README.rst').read().splitlines(False) return '\n' + '\n'.join(lines) + '\n' -setup(name='gg_scrapper', - version=gg_scrapper.__version__, +setup(name='gg_scraper', + version=gg_scraper.__version__, description='Download a Google Group to MBOX', long_description=get_long_description(), author='Matěj Cepl', author_email='mcepl@cepl.eu', - url='http://luther.ceplovi.cz/git/gg_scrapper.git', - scripts=['gg_scrapper.py'], + url='http://luther.ceplovi.cz/git/gg_scraper.git', + scripts=['gg_scraper.py'], keywords=['email', 'Google Groups', 'scrap', 'backup'], license='GNU GPL', classifiers=classifiers, diff --git a/test/group.yaml b/test/group.yaml index 59a33d4..82f0e31 100644 --- a/test/group.yaml +++ b/test/group.yaml @@ -1,10 +1,10 @@ -!!python/object:gg_scrapper.Group +!!python/object:gg_scraper.Group group_URL: https://groups.google.com/forum/#!forum/javascriptcz name: javascriptcz topics: -- !!python/object:gg_scrapper.Topic +- !!python/object:gg_scraper.Topic articles: - - !!python/object:gg_scrapper.Article {raw_message: "From scho...@schovi.cz Thu Jan\ + - !!python/object:gg_scraper.Article {raw_message: "From scho...@schovi.cz Thu Jan\ \ 2 16:12:04 2014\nReceived: by 10.224.192.193 with SMTP id dr1mr1092656qab.9.1306595926917;\n\ \ Sat, 28 May 2011 08:18:46 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\ Received: by 10.224.187.145 with SMTP id cw17ls698645qab.2.gmail; Sat, 28 May\n\ @@ -23,9 +23,9 @@ topics: \n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/5tbTMhlt4s0/t7QWA3IHsV0J'} name: "Zdroje, kter\xE9 byste nem\u011Bli minout." root: https://groups.google.com/forum/?_escaped_fragment_=topic/javascriptcz/5tbTMhlt4s0 -- !!python/object:gg_scrapper.Topic +- !!python/object:gg_scraper.Topic articles: - - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\ + - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\ \ Jan 2 16:12:06 2014\nReceived: by 10.224.136.200 with SMTP id s8mr198006qat.21.1306398867334;\n\ \ Thu, 26 May 2011 01:34:27 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\ Received: by 10.224.187.145 with SMTP id cw17ls261575qab.2.gmail; Thu, 26 May\n\ @@ -46,7 +46,7 @@ topics: \ v=B9echny pages, panes, buttons, widgets, atd. tak=BEe t=\ny\nappky pak vypadaly\ \ v=B9echny stejn=EC (hnusn=EC) a bylo to t=EC=BEkop=E1dn=\n=FD.\n\nhttp://blog.sproutcore.com/announcing-sproutcore-2-0/\n\ \n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/-4cy0XTGGaU/pUoGBDjK_HcJ'} - - !!python/object:gg_scrapper.Article {raw_message: "From damn...@gmail.com Thu Jan\ + - !!python/object:gg_scraper.Article {raw_message: "From damn...@gmail.com Thu Jan\ \ 2 16:12:07 2014\nReceived: by 10.100.168.2 with SMTP id q2mr1217945ane.14.1306495789592;\n\ \ Fri, 27 May 2011 04:29:49 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\ Received: by 10.90.58.22 with SMTP id g22ls443766aga.2.gmail; Fri, 27 May 2011\n\ @@ -71,7 +71,7 @@ topics: \ pages, panes, buttons, widgets, atd. tak=BEe=\n ty\n> appky pak vypadaly v=B9echny\ \ stejn=EC (hnusn=EC) a bylo to t=EC=BEkop=E1d=\nn=FD.\n>\n> http://blog.sproutcore.com/announcing-sproutcore-2-0/\n\ \n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/-4cy0XTGGaU/Gxus9ddtp5wJ'} - - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\ + - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\ \ Jan 2 16:12:07 2014\nReceived: by 10.224.9.144 with SMTP id l16mr693260qal.26.1306497361290;\n\ \ Fri, 27 May 2011 04:56:01 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\ Received: by 10.224.175.74 with SMTP id w10ls508844qaz.0.gmail; Fri, 27 May\n\ @@ -109,9 +109,9 @@ topics: \n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/-4cy0XTGGaU/IpL3eL0yancJ'} name: SproutCore root: https://groups.google.com/forum/?_escaped_fragment_=topic/javascriptcz/-4cy0XTGGaU -- !!python/object:gg_scrapper.Topic +- !!python/object:gg_scraper.Topic articles: - - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\ + - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\ \ Jan 2 16:12:09 2014\nReceived: by 10.224.126.72 with SMTP id b8mr1923833qas.13.1306351509553;\n\ \ Wed, 25 May 2011 12:25:09 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\ Received: by 10.224.181.131 with SMTP id by3ls169717qab.3.gmail; Wed, 25 May\n\ @@ -127,9 +127,9 @@ topics: \ charset=ISO-8859-1\n\n\n#javascript.cz na irc.freenode.net\n\n", root: 'https://groups.google.com/forum/message/raw?msg=javascriptcz/KpLLZ7thax4/nxKqd5qBVTIJ'} name: "IRC kan\xE1l" root: https://groups.google.com/forum/?_escaped_fragment_=topic/javascriptcz/KpLLZ7thax4 -- !!python/object:gg_scrapper.Topic +- !!python/object:gg_scraper.Topic articles: - - !!python/object:gg_scrapper.Article {raw_message: "From richte...@gmail.com Thu\ + - !!python/object:gg_scraper.Article {raw_message: "From richte...@gmail.com Thu\ \ Jan 2 16:12:10 2014\nReceived: by 10.224.215.3 with SMTP id hc3mr1823425qab.4.1306325120198;\n\ \ Wed, 25 May 2011 05:05:20 -0700 (PDT)\nX-BeenThere: javascriptcz@googlegroups.com\n\ Received: by 10.224.138.148 with SMTP id a20ls76694qau.5.gmail; Wed, 25 May\n\ diff --git a/test/test_functional.py b/test/test_functional.py index ec76998..c8f5bf2 100644 --- a/test/test_functional.py +++ b/test/test_functional.py @@ -4,7 +4,7 @@ import logging import io import os.path import unittest -import gg_scrapper +import gg_scraper IN_URL = 'https://groups.google.com/forum/#!forum/jbrout' ORIG_URL = 'http://groups.google.com/d/forum/jbrout' @@ -17,16 +17,16 @@ ARTICLE_URL = 'https://groups.google.com/d/msg/jbrout' + \ class TestGGScrapperFunctional(unittest.TestCase): def test_collecting_topics(self): - page = gg_scrapper.Group(IN_URL) + page = gg_scraper.Group(IN_URL) topics = page.get_topics() logging.debug("number of topics = %d", len(topics)) self.assertGreater(len(topics), 0) def test_collecting_articles(self): logging.debug('topic = URL {}'.format(TOPIC_URL)) - topic = gg_scrapper.Topic(TOPIC_URL, - 'repo version incompatible with ' + - 'ubuntu 11.04 ?') + topic = gg_scraper.Topic(TOPIC_URL, + 'repo version incompatible with ' + + 'ubuntu 11.04 ?') articles = topic.get_articles() article_count = topic.get_count_articles() logging.debug('article_count = {0:d}'.format(article_count)) @@ -35,7 +35,7 @@ class TestGGScrapperFunctional(unittest.TestCase): def test_get_raw_article(self): self.maxDiff = None - article = gg_scrapper.Article(ARTICLE_URL) + article = gg_scraper.Article(ARTICLE_URL) rfc_msg = article.collect_message().replace('\r\n', '\n') rfc_msg = '\n'.join(rfc_msg.split('\n')[1:]) diff --git a/test/test_unit.py b/test/test_unit.py index 70b001b..503aafe 100644 --- a/test/test_unit.py +++ b/test/test_unit.py @@ -2,8 +2,8 @@ import os import tempfile import yaml import unittest -import gg_scrapper -from gg_scrapper import Group, Topic, Article # noqa +import gg_scraper +from gg_scraper import Group, Topic, Article # noqa IN_URL = 'https://groups.google.com/forum/#!forum/jbrout' ORIG_URL = 'http://groups.google.com/d/forum/jbrout' @@ -13,11 +13,11 @@ EXP_URL = 'https://groups.google.com/forum/' + \ class TestGGScrapper(unittest.TestCase): def test_URL_conversion(self): - obs_URL = gg_scrapper.Group.unenscape_Google_bang_URL(IN_URL) + obs_URL = gg_scraper.Group.unenscape_Google_bang_URL(IN_URL) self.assertEqual(obs_URL, EXP_URL) def test_do_redirect(self): - obs_URL = gg_scrapper.Group.do_redirect(ORIG_URL) + obs_URL = gg_scraper.Group.do_redirect(ORIG_URL) self.assertEqual(obs_URL, EXP_URL) @@ -31,7 +31,7 @@ class TestMBOX(unittest.TestCase): '''Create a mbox file from (YAMLed) Group ''' mbx_file = tempfile.NamedTemporaryFile('w', delete=False) - mbx = gg_scrapper.MBOX(mbx_file.name) + mbx = gg_scraper.MBOX(mbx_file.name) mbx.write_group(self.group) with open('test/mbox.mbx') as exp_f: @@ -55,8 +55,8 @@ class TestMBOX(unittest.TestCase): class TestDemangle(unittest.TestCase): def test_demangle(self): self.maxDiff = None - gg_scrapper.demangle('test/unmangled_address.cnf', - 'test/mbox.mbx', 'unmangled.mbx') + gg_scraper.demangle('test/unmangled_address.cnf', + 'test/mbox.mbx', 'unmangled.mbx') with open('unmangled.mbx') as obs_mbx_f: with open('test/mbox_unmangled.mbx') as exp_mbx_f: |