def get_rssfeed_parsed(self, rssfeed_data, site_cookies_dict=None, user_agent=None): """ rssfeed_data: A dictionary containing rss feed data as stored in the YaRSS2 config. site_cookies_dict: A dictionary of cookie values to be used for this rssfeed. """ return_dict = {} rssfeeds_dict = {} cookie_header = {} return_dict["user_agent"] = user_agent if site_cookies_dict: cookie_header = http.get_cookie_header(site_cookies_dict) return_dict["cookie_header"] = cookie_header self.log.info("Fetching RSS Feed: '%s' with Cookie: '%s' and User-agent: '%s'." % (rssfeed_data["name"], http.get_cookie_header(cookie_header), user_agent)) # Will abort after 10 seconds if server doesn't answer try: parsed_feed = feedparser.parse(rssfeed_data["url"], request_headers=cookie_header, agent=user_agent, timeout=10) except Exception, e: self.log.warn("Exception occured in feedparser: " + str(e)) self.log.warn("Feedparser was called with url: '%s' using cookies: '%s' and User-agent: '%s'" % (rssfeed_data["url"], http.get_cookie_header(cookie_header), user_agent)) self.log.warn("Stacktrace:\n" + common.get_exception_string()) return None
def test_get_link(self): file_url = yarss2.util.common.get_resource(common.testdata_rssfeed_filename, path="tests/") from yarss2.lib.feedparser import feedparser parsed_feed = feedparser.parse(file_url) item = None for e in parsed_feed["items"]: item = e break # Item has enclosure, so it should use that link self.assertEquals(self.rssfeedhandler.get_link(item), item.enclosures[0]["href"]) del item["links"][:] # Item no longer has enclosures, so it should return the regular link self.assertEquals(self.rssfeedhandler.get_link(item), item["link"])
def test_feedparser_ampersant_in_url(self): """A bug in feedparser resulted in URL containing & when XML Parser was not available. This test disables XML Parser and verifies that the URL is correct """ file_path = common.get_resource("rss_with_ampersand_link.rss", path="tests") # This is the link in rss_with_ampersand_link.rss url = "http://hostname.com/Fetch?hash=2f21d4e59&digest=865178f9bc" expected = "http://hostname.com/Fetch?hash=2f21d4e59&digest=865178f9bc" # Disable XML Parser feedparser._XML_AVAILABLE = 0 parsed_feeds = feedparser.parse(file_path) for item in parsed_feeds['items']: self.assertEquals(expected, item["link"]) break
def test_feedparser_ampersant_in_url(self): """A bug in feedparser resulted in URL containing & when XML Parser was not available. This test disables XML Parser and verifies that the URL is correct """ file_path = common.get_resource("rss_with_ampersand_link.rss", path="tests") # This is the link in rss_with_ampersand_link.rss expected = "http://hostname.com/Fetch?hash=2f21d4e59&digest=865178f9bc" # Disable XML Parser feedparser._XML_AVAILABLE = 0 parsed_feeds = feedparser.parse(file_path) for item in parsed_feeds['items']: self.assertEquals(expected, item["link"]) break
def test_feedparser_dates(self): file_url = yarss2.util.common.get_resource("rss_with_special_dates.rss", path="tests/data/") from yarss2.lib.feedparser import feedparser parsed_feed = feedparser.parse(file_url) for item in parsed_feed['items']: # Some RSS feeds do not have a proper timestamp dt = None if item.has_key('published_parsed'): published_parsed = item['published_parsed'] #print "published_parsed:", published_parsed import time test_val = time.struct_time((2014, 4, 10, 3, 44, 14, 3, 100, -1)) self.assertEquals(test_val, published_parsed) break
def test_get_size(self): file_url = yarss2.util.common.get_resource("t1.rss", path="tests/data/feeds/") from yarss2.lib.feedparser import feedparser parsed_feed = feedparser.parse(file_url) size = self.rssfeedhandler.get_size(parsed_feed["items"][0]) self.assertEquals(len(size), 1) self.assertEquals(size[0], (4541927915.52, u'4.23 GB')) size = self.rssfeedhandler.get_size(parsed_feed["items"][1]) self.assertEquals(len(size), 1) self.assertEquals(size[0], (402349096.96, u'383.71 MB')) size = self.rssfeedhandler.get_size(parsed_feed["items"][2]) self.assertEquals(len(size), 1) self.assertEquals(size[0], (857007476)) size = self.rssfeedhandler.get_size(parsed_feed["items"][3]) self.assertEquals(len(size), 2) self.assertEquals(size[0], (14353107637)) self.assertEquals(size[1], (13529146982.4, u'12.6 GB'))
def get_rssfeed_parsed(self, rssfeed_data, site_cookies_dict=None): """ rssfeed_data: A dictionary containing rss feed data as stored in the YaRSS2 config. site_cookies_dict: A dictionary of cookie values to be used for this rssfeed. """ return_dict = {} rssfeeds_dict = {} cookie_header = {} if site_cookies_dict: cookie_header = http.get_cookie_header(site_cookies_dict) return_dict["cookie_header"] = cookie_header self.log.info("Fetching RSS Feed: '%s' with Cookie: '%s'." % (rssfeed_data["name"], cookie_header)) # Will abort after 10 seconds if server doesn't answer try: parsed_feed = feedparser.parse(rssfeed_data["url"], request_headers=cookie_header, agent=self.agent, timeout=10) except Exception, e: self.log.warn("Exception occured in feedparser:" + str(e)) self.log.warn("Feedparser was called with url: '%s' and header: '%s'" % (rssfeed_data["url"], cookie_header)) self.log.warn("Stacktrace:" + common.get_exception_string()) return None