aboutsummaryrefslogtreecommitdiffstats
path: root/test/test_functional.py
blob: ec769985dd49b73eb43b044cc635879637bf4e1e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# -*- coding: utf-8 -*-

import logging
import io
import os.path
import unittest
import gg_scrapper

IN_URL = 'https://groups.google.com/forum/#!forum/jbrout'
ORIG_URL = 'http://groups.google.com/d/forum/jbrout'
EXP_URL = 'https://groups.google.com/forum/' + \
    '?_escaped_fragment_=forum/jbrout'
TOPIC_URL = 'https://groups.google.com/forum/#!topic/jbrout/xNwoVmC07KI'
ARTICLE_URL = 'https://groups.google.com/d/msg/jbrout' + \
    '/xNwoVmC07KI/OfpRHFscUkwJ'


class TestGGScrapperFunctional(unittest.TestCase):
    def test_collecting_topics(self):
        page = gg_scrapper.Group(IN_URL)
        topics = page.get_topics()
        logging.debug("number of topics = %d", len(topics))
        self.assertGreater(len(topics), 0)

    def test_collecting_articles(self):
        logging.debug('topic = URL {}'.format(TOPIC_URL))
        topic = gg_scrapper.Topic(TOPIC_URL,
                                  'repo version incompatible with ' +
                                  'ubuntu 11.04 ?')
        articles = topic.get_articles()
        article_count = topic.get_count_articles()
        logging.debug('article_count = {0:d}'.format(article_count))
        logging.debug('articles = len {0:d}'.format(len(articles)))
        self.assertEqual(len(articles), article_count)

    def test_get_raw_article(self):
        self.maxDiff = None
        article = gg_scrapper.Article(ARTICLE_URL)

        rfc_msg = article.collect_message().replace('\r\n', '\n')
        rfc_msg = '\n'.join(rfc_msg.split('\n')[1:])

        exp_file_name = os.path.join(os.path.dirname(__file__), 'message.eml')
        with io.open(exp_file_name, 'r', encoding='utf8') as exp_f:
            self.assertEqual(rfc_msg, exp_f.read())


if __name__ == '__main__':
    unittest.main()