Пример #1
0
    def update(self):
        """Download the feed to refresh the information.

        This does the actual work of pulling down the feed and if it changes
        updates the cached information about the feed and entries within it.
        """
        info = feedparser.parse(self.url,
                                etag=self.url_etag, modified=self.url_modified,
                                agent=self._planet.user_agent)
        if info.has_key("status"):
           self.url_status = str(info.status)
        elif info.has_key("entries") and len(info.entries)>0:
           self.url_status = str(200)
        elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout':
           self.url_status = str(408)
        else:
           from pprint import pprint
           pprint({
               'error': 'Feedparser failed for some reason',
               'info': info
           })
           self.url_status = str(500)

        if self.url_status == '301' and \
           (info.has_key("entries") and len(info.entries)>0):
            log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
            try:
                os.link(cache.filename(self._planet.cache_directory, self.url),
                        cache.filename(self._planet.cache_directory, info.url))
            except:
                pass
            self.url = info.url
        elif self.url_status == '304':
            log.info("Feed %s unchanged", self.feed_information())
            return
        elif self.url_status == '410':
            log.info("Feed %s gone", self.feed_information())
            self.cache_write()
            return
        elif self.url_status == '408':
            log.warning("Feed %s timed out", self.feed_information())
            return
        elif int(self.url_status) >= 400:
            log.error("Error %s while updating feed %s",
                      self.url_status, self.feed_information())
            return
        else:
            log.info("Updating feed %s", self.feed_information())

        self.url_etag = info.has_key("etag") and info.etag or None
        self.url_modified = info.has_key("modified") and info.modified or None
        if self.url_etag is not None:
            log.debug("E-Tag: %s", self.url_etag)
        if self.url_modified is not None:
            log.debug("Last Modified: %s",
                      time.strftime(TIMEFMT_ISO, self.url_modified))

        self.update_info(info.feed)
        self.update_entries(info.entries)
        self.cache_write()
Пример #2
0
    def update(self):
        """Download the feed to refresh the information.

        This does the actual work of pulling down the feed and if it changes
        updates the cached information about the feed and entries within it.
        """
        info = feedparser.parse(self.url,
                                etag=self.url_etag, modified=self.url_modified,
                                agent=self._planet.user_agent)
        if info.has_key("status"):
           self.url_status = str(info.status)
        elif info.has_key("entries") and len(info.entries)>0:
           self.url_status = str(200)
        elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout':
           self.url_status = str(408)
        else:
           self.url_status = str(500)

        if self.url_status == '301' and \
           (info.has_key("entries") and len(info.entries)>0):
            log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
            try:
                os.link(cache.filename(self._planet.cache_directory, self.url),
                        cache.filename(self._planet.cache_directory, info.url))
            except:
                pass
            self.url = info.url
        elif self.url_status == '304':
            log.info("Feed %s unchanged", self.feed_information())
            return
        elif self.url_status == '410':
            log.info("Feed %s gone", self.feed_information())
            self.cache_write()
            return
        elif self.url_status == '408':
            log.warning("Feed %s timed out", self.feed_information())
            return
        elif int(self.url_status) >= 400:
            log.error("Error %s while updating feed %s",
                      self.url_status, self.feed_information())
            return
        else:
            log.info("Updating feed %s", self.feed_information())

        self.url_etag = info.has_key("etag") and info.etag or None
        self.url_modified = info.has_key("modified") and info.modified or None
        if self.url_etag is not None:
            log.debug("E-Tag: %s", self.url_etag)
        if self.url_modified is not None:
            #log.debug("Last Modified: %s",time.strftime(TIMEFMT_ISO, self.url_modified))

            log.debug("Last Modified: %s", self.url_modified)

        self.update_info(info.feed)
        self.update_entries(info.entries)
        self.cache_write()
Пример #3
0
    def __init__(self, planet, url):
        if not os.path.isdir(planet.cache_directory):
            os.makedirs(planet.cache_directory)
        cache_filename = cache.filename(planet.cache_directory, url)
        cache_file = dbhash.open(cache_filename, "c", 0666)

        cache.CachedInfo.__init__(self, cache_file, url, root=1)

        self._items = {}
        self._planet = planet
        self._expired = []
        self.url = url
        # retain the original URL for error reporting
        self.configured_url = url
        self.url_etag = None
        self.url_status = None
        self.url_modified = None
        self.name = None
        self.updated = None
        self.last_updated = None
        self.filter = None
        self.exclude = None
        self.next_order = "0"
        self.relevant_tags = None
        self.cache_read()
        self.cache_read_entries()

        if planet.config.has_section(url):
            for option in planet.config.options(url):
                value = planet.config.get(url, option)
                self.set_as_string(option, value, cached=0)
Пример #4
0
    def __init__(self, planet, url):
        if not os.path.isdir(planet.cache_directory):
            os.makedirs(planet.cache_directory)
        cache_filename = cache.filename(planet.cache_directory, url)
        cache_file = dbhash.open(cache_filename, "c", 0666)

        cache.CachedInfo.__init__(self, cache_file, url, root=1)

        self._items = {}
        self._planet = planet
        self._expired = []
        self.url = url
        # retain the original URL for error reporting
        self.configured_url = url
        self.url_etag = None
        self.url_status = None
        self.url_modified = None
        self.name = None
        self.updated = None
        self.last_updated = None
        self.filter = None
        self.exclude = None
        self.next_order = "0"
        self.cache_read()
        self.cache_read_entries()

        if planet.config.has_section(url):
            for option in planet.config.options(url):
                value = planet.config.get(url, option)
                self.set_as_string(option, value, cached=0)
Пример #5
0
    def update(self):
        """Download the feed to refresh the information.

        This does the actual work of pulling down the feed and if it changes
        updates the cached information about the feed and entries within it.
        """
        info = feedparser.parse(self.url,
                                etag=self.url_etag,
                                modified=self.url_modified,
                                agent=self._planet.user_agent)
        if not info.has_key("status"):
            log.info("Updating feed <%s>", self.url)
        elif info.status == 301 or info.status == 302:
            log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
            os.link(cache.filename(self._planet.cache_directory, self.url),
                    cache.filename(self._planet.cache_directory, info.url))
            self.url = info.url
        elif info.status == 304:
            log.info("Feed <%s> unchanged", self.url)
            return
        elif info.status >= 400:
            log.error("Error %d while updating feed <%s>", info.status,
                      self.url)
            return
        else:
            log.info("Updating feed <%s>", self.url)

        self.url_etag = info.has_key("etag") and info.etag or None
        self.url_modified = info.has_key("modified") and info.modified or None
        if self.url_etag is not None:
            log.debug("E-Tag: %s", self.url_etag)
        if self.url_modified is not None:
            log.debug("Last Modified: %s",
                      time.strftime(TIMEFMT_ISO, self.url_modified))

        self.update_info(info.feed)
        self.update_entries(info.entries)
        self.cache_write()
Пример #6
0
    def __init__(self, planet, url):
        if not os.path.isdir(planet.cache_directory):
            os.makedirs(planet.cache_directory)
        cache_filename = cache.filename(planet.cache_directory, url)
        cache_file = dbhash.open(cache_filename, "c", 0666)

        cache.CachedInfo.__init__(self, cache_file, url, root=1)

        self._items = {}
        self._planet = planet
        self._expired = []
        self.url = url
        self.url_etag = None
        self.url_modified = None
        self.name = None
        self.updated = None
        self.last_updated = None
        self.next_order = "0"
        self.cache_read()
        self.cache_read_entries()
Пример #7
0
 def cache_basename(self):
     return cache.filename("", self._id)
Пример #8
0
 def cache_basename(self):
     return cache.filename('', self._id)