def __fetch_remote(self, unid, contents, format=None, time=None, digest=None): # {{{ "Like __fetch, but accept contents for-, or fetch contents from remote ID. " if contents: # XXX: Accept given data as-is pass else: # Query server for contents and metadata source_id = self.__remote_id(unid, format) res = model.Resource.get_or_insert( hashlib.md5(source_id).hexdigest(), remote_id=source_id ) self.__srcnfo = self.store.getinfo(self.alias, unid).next() etag = None if self.__srcnfo: # XXX: or allow local update, see assert format later on assert not time and not digest, \ "Re-using exiting source." # Fetch updated only remote_update = util.fetch_uriref( source_id, self.__srcnfo.format, self.__srcnfo.time, res.etag, self.__srcnfo.digest) if remote_update: logger.info('Remote updated') contents, format, time, etag, digest, charset = \ remote_update doctree = None if format and self.__srcnfo.format: assert format == self.__srcnfo.format, \ "Remote format changed." else: if format and self.__srcnfo.format: assert format == self.__srcnfo.format, \ "Indicated format does not match record." if not self.__src: self.__src = self.__srcnfo.parent() contents, format, doctree, time, digest, charset = \ self.__src.contents, \ self.__srcnfo.format, \ self.__src.doctree, \ self.__srcnfo.time, \ self.__srcnfo.digest, \ self.__srcnfo.charset self.__doctree = doctree else: # Fetch new Source logging.info('Retrieving new remote content for %s. ', unid) contents, format, time, etag, digest, charset = \ util.fetch_uriref(source_id, format, time, None, digest) if etag: res.etag = etag res.put() self.__assert_contents(contents) self.__assert_charset(charset) self.__assert_digest(digest) if time: self.__assert_datetime(time)
def stat(self, unid, digest=None): # {{{ """ Returns True value when UNID is in storage and up to date, or false when it needs (re)processing. Passing a digest will assure failure when known digest is different. Otherwise a remote HTTP request may be needed for remote content. """ self._initialize() self.__assert_alias() self.__srcnfo = self.store.getinfo(self.alias, unid).next() if self.__srcnfo and digest: if unid_digest != self.__srcnfo.digest: logger.info("%s invalidated by digest", unid) v = 0 else: v = 1 elif hasattr(self.alias, 'remote_path'): if self.__srcnfo: format = self.__srcnfo.format else: format = self.alias.default_source_format source_id = self.__remote_id(unid, format) logger.debug("Checking remote path for %s (%s)", unid, source_id) res = None if self.__srcnfo: # Known remote UNID res = model.Resource.get_or_insert( hashlib.md5(source_id).hexdigest(), remote_id=source_id) src = util.fetch_uriref(source_id, format, self.__srcnfo.time, res.etag, self.__srcnfo.digest) else: # Check for existince of remote source src = util.fetch_uriref(source_id) if src: #v = src contents, format, time, etag, digest, charset = src src = None if self.__srcnfo: logger.info("Remote update (%s): %s", self.alias.handle, source_id) else: logger.info("New remote source (%s): %s ", self.alias.handle, source_id) v = 0 elif self.__srcnfo: v = 1 else: raise exception.NotFound(unid) else: v = 0 return self.__r_stat(unid, digest, v)
def stat(self, unid, digest=None): """ Returns True value when UNID is in storage and up to date, or false when it needs (re)processing. Passing a digest will assure failure when known digest is different. Otherwise a remote HTTP request may be needed for remote content. """ assert ALIAS_re.match(unid), "Invalid aliased ID: %s " % unid info = self.store.getinfo(self.alias, unid).next() if info and digest: if unid_digest != info.digest: logger.info("%s invalidated by digest", unid) return False else: return True elif hasattr(self.alias, 'remote_path'): source_id = self.__remote_id(unid) logger.info("Checking remote path for %s (%s)", unid, source_id) res = None if info: # Known remote UNID res = model.Resource.get_or_insert( hashlib.md5(source_id).hexdigest(), remote_id=source_id) rst = fetch_uriref(source_id, info.time, res.etag, info.digest) else: # Check for existince of remote source rst = fetch_uriref(source_id) if rst: return False contents, time, etag, digest = rst src = None if info: src = info.parent() logger.warning("Remote update (%s): %s", self.alias.handle, source_id) else: logger.warning("New remote source (%s): %s ", self.alias.handle, source_id) src, info = self.store.add(self.alias, unid, contents, digest=digest, time=time) #srcdigest = 'Content-MD5' res = model.Resource.get_or_insert( digest, remote_id=source_id) assert isinstance(contents, unicode) #src = info.parent() #src.source = contents #src.doctree = None #src.put() #info.digest = digest #info.time = time #info.put() #res.etag = etag #res.put() return False elif info: return True else: raise exception.NotFound(unid) else: return False