aboutsummaryrefslogblamecommitdiffstats
path: root/test/test_functional.py
blob: 34b49c648d976287d1cb1f99c0fd66e04c849a71 (plain) (tree)
1
2
3
4
5
6
7
8
9


                       
         
              
    
                                
                   
                   
                 

                                                         
                                                     



                                                                        
                                                                  
                                                          
                               


                                                  









                                                         
 
                                     
                                       
                                  

                                          




                                           
                                       
                                                          


                                                                    

                                                  

                                                      



                                                       

                                   
                                                 
 

                                                    
                                                                         
                                          
 



                                                                
                                                                             

                                                         

                                               

 

                          
# -*- coding: utf-8 -*-

import logging
import io
import os.path
try:
    import unittest2 as unittest
except ImportError:
    import unittest
import gg_scraper

IN_URL = 'https://groups.google.com/forum/#!forum/jbrout'
BAD_URL = "http://groups.google.com/group/rdflib-dev"
ORIG_URL = 'http://groups.google.com/d/forum/jbrout'
EXP_URL = 'https://groups.google.com/forum/' + \
    '?_escaped_fragment_=forum/jbrout'
TOPIC_URL = 'https://groups.google.com/forum/#!topic/jbrout/xNwoVmC07KI'
OSCAR_URL = 'https://groups.google.com/forum/#!forum/django-oscar'
ARTICLE_URL = 'https://groups.google.com/d/msg/jbrout' + \
    '/xNwoVmC07KI/OfpRHFscUkwJ'


class TestGGScrapperFunctional(unittest.TestCase):
    @staticmethod
    def msg_wo_From(inmsg):
        if gg_scraper.py3k and isinstance(inmsg, bytes):
            inmsg = inmsg.decode()
        out = inmsg.replace('\r\n', '\n').split('\n')[1:]
        return '\n'.join(out)

    @staticmethod
    def dired(x):
        return os.path.join(os.path.dirname(__file__), x)

    def test_collecting_topics(self):
        page = gg_scraper.Group(IN_URL)
        topics = page.get_topics()
        self.assertGreater(len(topics), 0)

    def test_collecting_oscar_topics(self):
        page = gg_scraper.Group(OSCAR_URL)
        topics = page.get_topics()
        self.assertGreater(len(topics), 0)

    def test_collecting_articles(self):
        logging.debug('topic = URL {0}'.format(TOPIC_URL))
        topic = gg_scraper.Topic(TOPIC_URL,
                                 'repo version incompatible with ' +
                                 'ubuntu 11.04 ?')
        articles = topic.get_articles()
        article_count = topic.get_count_articles()
        self.assertEqual(len(articles), article_count)

    def test_wrong_URL(self):
        with self.assertRaises(gg_scraper.BadURLError):
            gg_scraper.Group(BAD_URL)

    def test_get_raw_article(self):
        self.maxDiff = None
        article = gg_scraper.Article(ARTICLE_URL)

        with io.open(self.dired('message.eml'), 'r',
                     encoding='utf8') as exp_f:
            self.assertEqual(self.msg_wo_From(article.collect_message()),
                             exp_f.read())

    def test_py26_unicode_raw_article(self):
        self.maxDiff = None
        URL = 'https://groups.google.com/forum/message/raw?' + \
            'msg=django-oscar/BbBiMWwolf0/gn-s0sFYEhkJ'
        article = self.msg_wo_From(gg_scraper.Article(URL).collect_message())
        with io.open(self.dired('py26_unicode.eml'), 'r',
                     encoding='utf8') as exp_f:
            expected = exp_f.read()
            self.assertEqual(article, expected)


if __name__ == '__main__':
    unittest.main()