示例#1
0
    def process_feed(self, hdl):
        """
        Callback to be called by HeadDownloader when the HEAD downloading
        has finished.
        """
        self.update_count_downloading(-1)
        if hdl.error:
            self.logger.info("Couldn't retrieve header '%s'. Details:\
                             \n%s" % (hdl.url, hdl.error))
            return

        for feed in self.feeds:
            if feed.url == hdl.url:
                break
        feed.last_check = datetime.now()
        try:
            result_modified = (feed.last_modified != hdl.info["last-modified"])
        except KeyError:
            result_modified = True
        # Check last_modified
        if not feed.last_modified or result_modified:
            # Download the Feed
            dl = Downloader(feed.url)
            dl.start()
            dl.join(TIMEOUT * 2)
            if dl.is_alive():
                self.logger.info("Time out while getting feed '%s'." % (dl.url))
                return

            # Check and get new entries
            new_entries = self._check_entries(feed)
            # Associate entries with feed and update 'last_modified' field
            feed.entries = new_entries
            try:
                feed.last_modified = hdl.info["last-modified"]
            except KeyError:
                # last-modified header doesn't exist
                pass
示例#2
0
    def check_feed(self, feedurl, check_feed):
        """
        Checks that feed exists and it's well formed.
        'check_feed' could be True or False.
        Returns two variables if everything was OK: feed's title, site's url.
        Returns None if there was an error.
        """
        # Now the feed will be downloaded
        dl = Downloader(feedurl)
        dl.start()
        dl.join()
        site = None

        # Any error while downloading
        if dl.error:
            self.logger.info("Couldn't retrieve feed '%s'. Details:\
                             \n%s" % (feedurl, dl.error))
            return None

        # Check well-formed XML
        if check_feed:
            if not FeedManager._valid_feed(dl.response.read()):
                self.logger.info("'%s' is not valid XML file!" % feedurl)
                dl.response.close()
                return None

        try:
            # Parse feed
            d = feedparser.parse(feedurl)
            # Clean title
            title = re.sub("\s+", " ", d.feed.title)
            return title, d.feed.link
        except:
            s = "Error while adding '%s'. Probably not a valid XML" \
                " file. Check the URL, please." % feedurl
            self.logger.info(s)
            return None