Exemplo n.º 1
0
    def test_serach_links_join_url(self):
        feed_str = get_data_file("chooseopera.html")
        links = feedutil.search_links_url(
            "http://my.opera.com/chooseopera/blog/", feed_str)

        self.assertListEqual(links, [
            'http://my.opera.com/chooseopera/xml/rss/blog/',
            'http://my.opera.com/chooseopera/xml/atom/blog/'
        ])
Exemplo n.º 2
0
 def test_search_links_order(self):
     """ Different order for type and href in link sentence.
         Format in UTF-8 (Russian)"""
     feed_str = get_data_file("mk.ru.html")
     links = feedutil.search_links_url("http://www.mk.ru/", feed_str)
     self.assertListEqual(links, [
         'http://www.mk.ru/rss/news/index.xml',
         'http://www.mk.ru/rss/mk/index.xml'
     ])
Exemplo n.º 3
0
 def check_homepage_for_feeds(self, dryrun=False):
     homepages = [self.homepage, self.rss_homepage]
     for homepage in homepages:
         if not homepage:
             continue
         links = search_links_url(url=homepage, depth=3)
         for link in links:
             print('link:', link)
             if not dryrun:
                 feed = OrganizationFeed.objects.get_or_create(
                     organization=self,
                     feed_url=link,
                     defaults=dict(is_active=False))[0]
     self.save()
Exemplo n.º 4
0
 def test_search_links_mixed(self):
     """ Mixed atom and rss sentences """
     feed_str = get_data_file("elcomercio.pe.html")
     links = feedutil.search_links_url("http://www.elcomercio.pe/",
                                       feed_str)
     self.assertListEqual(links, [
         'http://www.elcomercio.pe/feed/portada.xml',
         'http://www.elcomercio.pe/feed/portada/politica.xml',
         'http://www.elcomercio.pe/feed/portada/lima.xml',
         'http://www.elcomercio.pe/feed/portada/peru.xml',
         'http://www.elcomercio.pe/feed/portada/mundo.xml',
         'http://www.elcomercio.pe/feed/portada/economia.xml',
         'http://www.elcomercio.pe/feed/portada/tecnologia.xml',
         'http://www.elcomercio.pe/feed/portada/deportes.xml',
         'http://www.elcomercio.pe/feed/portada/espectaculos.xml',
         'http://www.elcomercio.pe/feed/portada/ecologia.xml',
         'http://www.elcomercio.pe/feed/portada/opinion.xml'
     ])
Exemplo n.º 5
0
 def test_search_alternate_links_double_function(self):
     feed_str = get_data_file("smp.no.html")
     # Feedparser no longer supports parsing raw HTML documents.
     #         feed = feedparser.parse(feed_str)
     #         links = feedutil.search_alternate_links(feed)
     #         self.assertListEqual(links,
     #             [u'http://www.smp.no/?service=rss',
     #             u'http://www.smp.no/?service=rss&t=0',
     #             u'http://www.smp.no/nyheter/?service=rss',
     #             u'http://www.smp.no/kultur/?service=rss']
     #         )
     links = feedutil.search_links_url("http://www.smp.no/", feed_str)
     self.assertListEqual(links, [
         u'http://www.smp.no/?service=rss',
         u'http://www.smp.no/?service=rss&t=0',
         u'http://www.smp.no/nyheter/?service=rss',
         u'http://www.smp.no/kultur/?service=rss'
     ])
Exemplo n.º 6
0
 def test_search_link_malformed(self):
     """ SGML and others are not able to parse it """
     feed_str = get_data_file("foxnews.com.html")
     links = feedutil.search_links_url("http://www.foxnews.com/", feed_str)
     self.assertListEqual(links,
                          ['http://feeds.feedburner.com/foxnews/latest'])