def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts, status_headers, is_timegate, is_proxy, coll=None): # memento url + header if not memento_dt and memento_ts: memento_dt = timestamp_to_http_date(memento_ts) if memento_dt: status_headers.headers.append(('Memento-Datetime', memento_dt)) if is_proxy: memento_url = url else: memento_url = full_prefix + memento_ts + self.replay_mod memento_url += '/' + url else: memento_url = None timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix) link = [] if not is_proxy: link.append(MementoUtils.make_link(url, 'original')) link.append(MementoUtils.make_link(timegate_url, 'timegate')) link.append(MementoUtils.make_link(timemap_url, 'timemap')) if memento_dt: link.append(MementoUtils.make_memento_link(memento_url, 'memento', memento_dt, coll)) link_str = ', '.join(link) status_headers.headers.append(('Link', link_str)) if is_timegate: status_headers.headers.append(('Vary', 'accept-datetime'))
def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n', memento_format=None): """Creates a memento link string for a timemap :param dict cdx: The cdx object :param str|None datetime: The datetime :param str rel: The rel type :param str end: Optional string appended to the end of the created link string :param str|None memento_format: Optional string used to format the URL :return: A memento link string :rtype: str """ url = cdx.get('url') if not url: url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length')) if not datetime: datetime = timestamp_to_http_date(cdx['timestamp']) return cls.make_memento_link( url, rel, datetime, cdx.get('source-coll'), memento_format) + end
def _assert_memento(self, resp, url, ts, fmod, dt=''): dt = dt or timestamp_to_http_date(ts) links = self.get_links(resp) assert MEMENTO_DATETIME in resp.headers assert resp.headers[MEMENTO_DATETIME] == dt # memento link memento_link = self.make_memento_link(url, ts, dt, fmod) assert memento_link in links # content location assert '/pywb/{1}{0}/{2}'.format( fmod, ts, url) in resp.headers['Content-Location'] # content location part of memento link assert resp.headers['Content-Location'] in memento_link # timegate link assert self.make_timegate_link(url, '') in links # timemap link assert self.make_timemap_link(url) in links # original assert self.make_original_link(url) in links
def _assert_memento(self, resp, url, ts, fmod, dt=''): dt = dt or timestamp_to_http_date(ts) links = self.get_links(resp) assert MEMENTO_DATETIME in resp.headers assert resp.headers[MEMENTO_DATETIME] == dt # memento link memento_link = self.make_memento_link(url, ts, dt, fmod) assert memento_link in links # content location assert '/pywb/{1}{0}/{2}'.format(fmod, ts, url) in resp.headers['Content-Location'] # content location part of memento link assert resp.headers['Content-Location'] in memento_link # timegate link assert self.make_timegate_link(url, fmod) in links # timemap link assert self.make_timemap_link(url) in links # original assert self.make_original_link(url) in links
def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n'): url = cdx.get('url') if not url: url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length')) if not datetime: datetime = timestamp_to_http_date(cdx['timestamp']) return cls.make_memento_link(url, rel, datetime, cdx.get('source-coll')) + end
def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n'): url = cdx.get('url') if not url: url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length')) if not datetime: datetime = timestamp_to_http_date(cdx['timestamp']) return cls.make_memento_link(url, rel, datetime, cdx.get('source-coll')) + end
def get_timegate_links(self, params, timestamp): url = res_template(self.timegate_url, params) accept_dt = timestamp_to_http_date(timestamp) try: headers = self._get_headers(params) headers['Accept-Datetime'] = accept_dt res = self.sesh.head(url, headers=headers) res.raise_for_status() except Exception as e: self.logger.debug('FAILED: ' + str(e)) raise NotFoundException(url) links = res.headers.get('Link') if not links: raise NotFoundException(url) return links
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts, status_headers, is_timegate, is_proxy, coll=None, pref_applied=None, mod=None, is_memento=True): """Adds the memento link headers to supplied StatusAndHeaders instance :param str url: The URI-R being rewritten :param str full_prefix: The replay prefix :param str|None memento_dt: The memento datetime for the URI-R being rewritten :param str memento_ts: The memento timestamp :param warcio.StatusAndHeaders status_headers: :param bool is_timegate: Are we returning a response for a timegate :param bool is_proxy: Are we operating in proxy mode :param str|None coll: The collection the URI-R is from :param str|None pref_applied: :param str|None mod: The rewrite modifier :param bool is_memento: :rtype: None """ replay_mod = mod or self.replay_mod # memento url + header if not memento_dt and memento_ts: memento_dt = timestamp_to_http_date(memento_ts) if memento_dt: if is_memento: status_headers.headers.append(('Memento-Datetime', memento_dt)) if is_proxy: memento_url = url else: memento_url = full_prefix + memento_ts + replay_mod memento_url += '/' + url else: memento_url = None timegate_url, timemap_url = self._get_timegate_timemap( url, full_prefix, mod) link = [] if not is_proxy: link.append(MementoUtils.make_link(url, 'original')) link.append(MementoUtils.make_link(timegate_url, 'timegate')) link.append(MementoUtils.make_link(timemap_url, 'timemap')) if memento_dt: link.append( MementoUtils.make_memento_link(memento_url, 'memento', memento_dt, coll)) link_str = ', '.join(link) status_headers.headers.append(('Link', link_str)) vary = '' if is_timegate: vary = 'accept-datetime' if pref_applied: vary = 'Prefer' if not vary else vary + ', Prefer' status_headers.headers.append(('Preference-Applied', pref_applied)) if vary: status_headers.headers.append(('Vary', vary))