def process_feed(self, hdl): """ Callback to be called by HeadDownloader when the HEAD downloading has finished. """ self.update_count_downloading(-1) if hdl.error: self.logger.info("Couldn't retrieve header '%s'. Details:\ \n%s" % (hdl.url, hdl.error)) return for feed in self.feeds: if feed.url == hdl.url: break feed.last_check = datetime.now() try: result_modified = (feed.last_modified != hdl.info["last-modified"]) except KeyError: result_modified = True # Check last_modified if not feed.last_modified or result_modified: # Download the Feed dl = Downloader(feed.url) dl.start() dl.join(TIMEOUT * 2) if dl.is_alive(): self.logger.info("Time out while getting feed '%s'." % (dl.url)) return # Check and get new entries new_entries = self._check_entries(feed) # Associate entries with feed and update 'last_modified' field feed.entries = new_entries try: feed.last_modified = hdl.info["last-modified"] except KeyError: # last-modified header doesn't exist pass
def check_feed(self, feedurl, check_feed): """ Checks that feed exists and it's well formed. 'check_feed' could be True or False. Returns two variables if everything was OK: feed's title, site's url. Returns None if there was an error. """ # Now the feed will be downloaded dl = Downloader(feedurl) dl.start() dl.join() site = None # Any error while downloading if dl.error: self.logger.info("Couldn't retrieve feed '%s'. Details:\ \n%s" % (feedurl, dl.error)) return None # Check well-formed XML if check_feed: if not FeedManager._valid_feed(dl.response.read()): self.logger.info("'%s' is not valid XML file!" % feedurl) dl.response.close() return None try: # Parse feed d = feedparser.parse(feedurl) # Clean title title = re.sub("\s+", " ", d.feed.title) return title, d.feed.link except: s = "Error while adding '%s'. Probably not a valid XML" \ " file. Check the URL, please." % feedurl self.logger.info(s) return None