Пример #1
0
 def test_image_content(self):
     resp = self.response2
     resp.headers['content-type'] = 'image/png'
     self.head_patch.return_value = resp
     article = ClassicArticleBuilder(Feed(id=1, user_id=1),
                                     self.entry2).enhance()
     self.assertEqual(ArticleType.image, article['article_type'])
Пример #2
0
 def test_match_light_parsing_ok(self):
     feed = Feed(feed_type=FeedType.reddit,
                 link='https://www.reddit.com/r/france/.rss')
     tags = [{
         'scheme': None,
         'term': 'to',
         'label': ''
     }, {
         'scheme': None,
         'term': 'be',
         'label': ''
     }, {
         'scheme': None,
         'term': 'removed',
         'label': ''
     }]
     entry = {'content': [{'value': CONTENT}], 'tags': tags}
     builder = RedditArticleBuilder(feed, entry)
     self.assertEqual(builder.article['link'],
                      'https://supload.com/rJY-37gLe')
     self.assertEqual(
         builder.article['comments'],
         'https://www.reddit.com/r/Map_Porn/comments/5mxq4o/'
         'map_of_irish_clans_in_times_of_henry_viii_1294/')
     self.assertEqual(builder.article['tags'], set())
Пример #3
0
 def test_missing_title(self):
     self.head_patch.return_value = self.get_response('http:')
     article = ClassicArticleBuilder(Feed(id=1, user_id=1),
                                     self.entry).enhance()
     self.assertEqual('http://www.pariszigzag.fr/?p=56413',
                      article['entry_id'])
     self.assertEqual('http:' + self.response_url, article['link'])
     self.assertEqual(1, article['user_id'])
     self.assertEqual(1, article['feed_id'])
Пример #4
0
 def test_image_content(self):
     entry, resp = self.entry2, self.response2
     resp.headers['content-type'] = 'image/png'
     entry.pop('links')
     entry['link'] = resp.url = 'https://domain.tld/to-img.png'
     self.head_patch.return_value = resp
     articles = list(
         ClassicArticleBuilder(Feed(id=1, user_id=1), entry, {}).enhance())
     self.assertEqual(1, len(articles))
     article = articles[0]
     self.assertEqual(ArticleType.image, article['article_type'])
Пример #5
0
 def setUp(self):
     super().setUp()
     self.feed = Feed(user_id=1,
                      id=1,
                      title='title',
                      description='description',
                      etag='',
                      error_count=5,
                      feed_type=FeedType.classic,
                      link='link')
     self.resp = Mock(text='text', headers={}, status_code=304, history=[])
Пример #6
0
    def test_embedded_content(self):
        self.head_patch.return_value = self.response2
        article = ClassicArticleBuilder(Feed(id=1, user_id=1),
                                        self.entry2).enhance()

        self.assertEqual('yt:video:scbrjaqM3Oc', article['entry_id'])
        self.assertEqual(self.response2.url, article['link'])
        self.assertEqual(
            "Ceci n'est pas Old Boy - Owlboy (suite) - "
            "Benzaie Live", article['title'])
        self.assertEqual(1, article['user_id'])
        self.assertEqual(ArticleType.embedded, article['article_type'])
        self.assertEqual(1, article['feed_id'])
Пример #7
0
    def test_missing_scheme(self):
        response = self.get_response('http:')
        self.head_patch.side_effect = [
            MissingSchema, MissingSchema, MissingSchema, response
        ]
        entry = self.entry
        entry['link'] = entry['link'][5:]  # removing scheme, for testing

        article = ClassicArticleBuilder(Feed(id=1, user_id=1), entry).enhance()

        self.assertEqual(4, self.head_patch.call_count)
        self.assertEqual(response.url, self.head_patch.call_args[0][0])
        self.assertEqual('http://www.pariszigzag.fr/?p=56413',
                         article['entry_id'])
        self.assertEqual(response.url, article['link'])
        self.assertEqual(1, article['user_id'])
        self.assertEqual(1, article['feed_id'])
Пример #8
0
    def test_missing_scheme(self):
        response = self.get_response('http:')
        self.head_patch.side_effect = [
            MissingSchema, MissingSchema, MissingSchema, response
        ]
        self.jarr_get_patch.return_value = response
        entry = self.entry
        entry['link'] = entry['link'][5:]

        article = ClassicArticleBuilder(Feed(id=1, user_id=1), entry).enhance()

        self.assertEqual(4, self.head_patch.call_count)
        self.assertEqual(response.url, self.head_patch.call_args[0][0])
        self.assertEqual('http://www.pariszigzag.fr/?p=56413',
                         article['entry_id'])
        self.assertEqual(response.url, article['link'])
        self.assertEqual('Les plus belles boulangeries de Paris',
                         article['title'])
        self.assertEqual(1, article['user_id'])
        self.assertEqual(1, article['feed_id'])
Пример #9
0
 def test_entry_parsing(self):
     feed = Feed(link='https://feeds.feedburner.com/Koreus-articles')
     entry = {'summary_detail': {'value': CONTENT}, 'link': self.comments}
     builder = KoreusArticleBuilder(feed, entry, {})
     self.assertEqual(builder.article['link'], self.link)
     self.assertEqual(builder.article['comments'], self.comments)
Пример #10
0
 def setUp(self):
     module = 'jarr.crawler.article_builders.abstract.'
     self._head_patch = patch(module + 'requests.head')
     self.head_patch = self._head_patch.start()
     self.head_patch.return_value = None
     self.feed = Feed(link='https://a.random.url/')