def get_version(self, timestamp_or_version): """ Get content of the file in specific version. Version can be specified by version number (convenience atop the GridFS API by MongoDB) or unix timestamp. @param timestamp_or_version: version or timestamp of version which we want to retrieve. @type timestamp_or_version: int @return: content of the file in specified time/version @rtype: Content @raises: DocumentHistoryNotAvaliable if no such version in database """ if not isinstance(timestamp_or_version, (int, float)): raise TypeError("timestamp_or_version must be float or integer") # version if timestamp_or_version < 10000: # try to get content from cache by version if timestamp_or_version in self.content: return self.content[timestamp_or_version] h = self._headers.get_by_version(self.filename, timestamp_or_version, last_available=True) if h is None: raise DocumentHistoryNotAvaliable("Version %s of document %s is"\ " not available." % (timestamp_or_version, self.filename)) # try to get content from cache by content ID content_id = h['content'] # ObjectiId if content_id in self.content: return self.content[content_id] # otherwise load content from db g = self._filesystem.get(content_id) # GridOut # cache it r = self.content[content_id] = self.content[timestamp_or_version] = Content(g) # timestamp else: h = self._headers.get_by_time(self.filename, timestamp_or_version, last_available=True) if h is None: t = HTTPDateTime().from_timestamp(timestamp_or_version) raise DocumentHistoryNotAvaliable("Version of document %s in time"\ " %s is not available." % (self.filename, t.to_httpheader_format())) # try to get content from cache by content ID content_id = h['content'] # ObjectiId if content_id in self.content: return self.content[content_id] # otherwise load content from db content_id = h['content'] # ObjectiId g = self._filesystem.get(content_id) # GridOut r = self.content[content_id] = Content(g) # cache it # return the content, which was requested return r
def last_checked(self, url): # Pokud vrati None, pak tento zdroj nebyl NIKDY checkovan, coz by se # nemelo moc stavat, protoze vzdy je checknut na zacatku v konstruktoru # MonitoredResource POZOR! je ale mozne, ze se header neulozi, protoze # treba vyprsi timeout. r = self.get_by_time(url, time.time(), last_available=False) if r is None: return None return HTTPDateTime().from_timestamp(r['timestamp'])
def last_checked(self, url): """ Get time when 'url' was last checked WARNING! if None is returned, then 'url' was never checked that should never happen, as 'url' is always checked in constructor of MonitoredResource but it's possible that the header is not saved because of an error, eg. timeout or other @param url: url of resource checked @type url: string @returns: time of last check @rtype: HTTPDateTime """ # Pokud vrati None, pak tento zdroj nebyl NIKDY checkovan, coz by se # nemelo moc stavat, protoze vzdy je checknut na zacatku v konstruktoru # MonitoredResource POZOR! je ale mozne, ze se header neulozi, protoze # treba vyprsi timeout. r = self.get_by_time(url, time.time(), last_available=False) if r is None: return None return HTTPDateTime().from_timestamp(r['timestamp'])
def get_version(self, timestamp_or_version): """ Get content of the file in specific version. Version can be specified by version number (convenience atop the GridFS API by MongoDB) or unix timestamp. @param timestamp_or_version: version or timestamp of version which we want to retrieve. @type timestamp_or_version: int @return: content of the file in specified time/version @rtype: Content @raises: DocumentHistoryNotAvaliable if no such version in database """ if not isinstance(timestamp_or_version, (int, float)): raise TypeError("timestamp_or_version must be float or integer") # version if timestamp_or_version < 10000: # try to get content from cache by version if timestamp_or_version in self.content: return self.content[timestamp_or_version] h = self._headers.get_by_version(self.filename, timestamp_or_version, last_available=True) if h is None: raise DocumentHistoryNotAvaliable("Version %s of document %s is"\ " not available." % (timestamp_or_version, self.filename)) #? print "Document: ",h # try to get content from cache by content ID content_id = h['timestamp'] # ObjectiId if content_id in self.content: return self.content[content_id] #? print "Content_id: ",h['content'] # otherwise load content from db #g = self._filesystem.get(content_id) # GridOut g = self._filesystem.get_version(filename=self.filename,version=timestamp_or_version) # cache it r = self.content[content_id] = self.content[timestamp_or_version] = Content(g) # timestamp else: h = self._headers.get_by_time(self.filename, timestamp_or_version, last_available=True) if h is None: t = HTTPDateTime().from_timestamp(timestamp_or_version) raise DocumentHistoryNotAvaliable("h is none\nVersion of document %s in time"\ " %s is not available." % (self.filename, t.to_httpheader_format())) # try to get content from cache by content ID content_id = h['timestamp'] # ObjectiId if content_id in self.content: return self.content[content_id] # otherwise load content from db # ... the right query might do the same with a single line of code i = -1 time_shift = -1 * HTTPDateTime().to_timestamp() while(True): try: g = self._filesystem.get_version(filename=self.filename,version=i) # GridOut upload_date = HTTPDateTime().from_gridfs_upload_date(g.upload_date).to_timestamp() #? print "\nupload_date: ",upload_date+time_shift," ",g.upload_date," timestamp: ",timestamp_or_version,"\n" if (upload_date+time_shift) < timestamp_or_version : # correction for time zone!!! r = self.content[content_id] = Content(g) # cache it return r else: i = i - 1 except : # FIX fill in name of exception raise DocumentHistoryNotAvaliable("Version of document %s in time"\ " %s is not available." % (self.filename, HTTPDateTime().from_timestamp(timestamp_or_version).to_httpheader_format())) # return the content, which was requested return r