def showMementosForURIRs(urir): urir = getCompleteURI(urir) if ipwbConfig.isLocalHosty(urir): urir = urir.split('/', 4)[4] s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbConfig.getIPWBReplayIndexPath() print('Getting CDXJ Lines with the URI-R {0} from {1}'.format( urir, indexPath)) cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath) if len(cdxjLinesWithURIR) == 1: fields = cdxjLinesWithURIR[0].split(' ', 2) redirectURI = '/{1}/{0}'.format(unsurt(fields[0]), fields[1]) return redirect(redirectURI, code=302) msg = '' if cdxjLinesWithURIR: msg += '<p>{0} capture(s) available:</p><ul>'.format( len(cdxjLinesWithURIR)) for line in cdxjLinesWithURIR: fields = line.split(' ', 2) dt14 = fields[1] dtrfc1123 = ipwbConfig.datetimeToRFC1123(fields[1]) msg += ('<li><a href="/{1}/{0}">{0} at {2}</a></li>'.format( unsurt(fields[0]), dt14, dtrfc1123)) msg += '</ul>' return Response(msg)
def generateTimeMapFromCDXJLines(cdxjLines, original, tmself): tmData = '<{0}>; rel="original",\n'.format(unsurt(original)) tmData += '<{0}>; rel="self"; '.format(tmself) tmData += 'type="application/link-format",\n' hostAndPort = tmself[0:tmself.index('timemap/')] for line in cdxjLines: (surtURI, datetime, json) = line.split(' ', 2) dtRFC1123 = ipwbConfig.datetimeToRFC1123(datetime) tmData += '<{0}{1}/{2}>; rel="memento"; datetime="{3}",\n'.format( hostAndPort, datetime, unsurt(surtURI), dtRFC1123) tmData = tmData[0:-2] # Trim final , and LF return tmData
def generateCDXJTimeMapFromCDXJLines(cdxjLines, original, tmself): tmurl = getProxiedURIT(tmself) if app.proxy is not None: tmself = urlunsplit(tmurl) # unsurted URI will never have a scheme, add one originalURI = 'http://{0}'.format(unsurt(original)) tmData = '!context ["http://tools.ietf.org/html/rfc7089"]\n' tmData += '!id {{"uri": "{0}"}}\n'.format(tmself) tmData += '!keys ["memento_datetime_YYYYMMDDhhmmss"]\n' tmData += '!meta {{"original_uri": "{0}"}}\n'.format(originalURI) linkTMURI = tmself.replace('/timemap/cdxj/', '/timemap/link/') tmData += ('!meta {{"timemap_uri": {{' '"link_format": "{0}", ' '"cdxj_format": "{1}"' '}}}}\n').format(linkTMURI, tmself) hostAndPort = tmself[0:tmself.index('timemap/')] for i, line in enumerate(cdxjLines): (surtURI, datetime, json) = line.split(' ', 2) dtRFC1123 = ipwbConfig.datetimeToRFC1123(datetime) firstLastStr = '' if len(cdxjLines) > 1: if i == 0: firstLastStr = 'first ' elif i == len(cdxjLines) - 1: firstLastStr = 'last ' elif len(cdxjLines) == 1: firstLastStr = 'first last ' tmData += ('{1} {{' '"uri": "{0}{1}/{2}", ' '"rel": "{3}memento", ' '"datetime"="{4}"}}\n').format(hostAndPort, datetime, unsurt(surtURI), firstLastStr, dtRFC1123) tmData = tmData[0:-1] # Trim final , and LF return tmData
def generateLinkTimeMapFromCDXJLines(cdxjLines, original, tmself): tmurl = getProxiedURIT(tmself) if app.proxy is not None: tmself = urlunsplit(tmurl) # Extract and trim for host:port prepending tmurl[2] = '' # Clear TM path hostAndPort = urlunsplit(tmurl) + '/' # unsurted URI will never have a scheme, add one originalURI = 'http://{0}'.format(unsurt(original)) tmData = '<{0}>; rel="original",\n'.format(originalURI) tmData += '<{0}>; rel="self timemap"; '.format(tmself) tmData += 'type="application/link-format",\n' cdxjTMURI = tmself.replace('/timemap/link/', '/timemap/cdxj/') tmData += '<{0}>; rel="timemap"; '.format(cdxjTMURI) tmData += 'type="application/cdxj+ors",\n' for i, line in enumerate(cdxjLines): (surtURI, datetime, json) = line.split(' ', 2) dtRFC1123 = ipwbConfig.datetimeToRFC1123(datetime) firstLastStr = '' if len(cdxjLines) > 1: if i == 0: firstLastStr = 'first ' elif i == len(cdxjLines) - 1: firstLastStr = 'last ' elif len(cdxjLines) == 1: firstLastStr = 'first last ' tmData += '<{0}{1}/{2}>; rel="{3}memento"; datetime="{4}",\n'.format( hostAndPort, datetime, unsurt(surtURI), firstLastStr, dtRFC1123) tmData = tmData[0:-2] # Trim final , and LF return tmData
def show_uri(path, datetime=None): global IPFS_API if len(path) == 0: return showWebUI('index.html') # TODO: Use a better approach to serve static contents # instead of using the same logic for every JS file as the SW script localScripts = [ 'serviceWorker.js', 'reconstructive.js', 'reconstructive-banner.js' ] if path in localScripts: return getServiceWorker(path) daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT) if not ipwbConfig.isDaemonAlive(daemonAddress): errStr = ('IPFS daemon not running. ' 'Start it using $ ipfs daemon on the command-line ' ' or from the <a href="/">' 'IPWB replay homepage</a>.') return Response(errStr, status=503) path = getCompleteURI(path) cdxjLine = '' try: surtedURI = surt.surt(path, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbConfig.getIPWBReplayIndexPath() searchString = surtedURI if datetime is not None: searchString = surtedURI + ' ' + datetime cdxjLine = getCDXJLine_binarySearch(searchString, indexPath) print('CDXJ Line: {0}'.format(cdxjLine)) except Exception as e: print(sys.exc_info()[0]) respString = ('{0} not found :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) if cdxjLine is None: # Resource not found in archives return generateNoMementosInterface(path, datetime) cdxjParts = cdxjLine.split(" ", 2) jObj = json.loads(cdxjParts[2]) datetime = cdxjParts[1] digests = jObj['locator'].split('/') class HashNotFoundError(Exception): pass try: def handler(signum, frame): raise HashNotFoundError() signal.signal(signal.SIGALRM, handler) signal.alarm(10) payload = IPFS_API.cat(digests[-1]) header = IPFS_API.cat(digests[-2]) signal.alarm(0) except ipfsapi.exceptions.TimeoutError: print("{0} not found at {1}".format(cdxjParts[0], digests[-1])) respString = ('{0} not found in IPFS :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) except TypeError: print('A type error occurred') print(traceback.format_exc()) print(sys.exc_info()[0]) except HashNotFoundError: print("Hashes not found") return '', 404 except Exception as e: print('Unknown exception occurred while fetching from ipfs.') print(sys.exc_info()[0]) sys.exit() if 'encryption_method' in jObj: keyString = None while keyString is None: if 'encryption_key' in jObj: keyString = jObj['encryption_key'] else: askForKey = ('Enter a path for file', ' containing decryption key: \n> ') keyString = raw_input(askForKey) encryptionMethod = None if jObj['encryption_method'] == 'xor': encryptionMethod = XOR pKey = encryptionMethod.new(keyString) payload = pKey.decrypt(base64.b64decode(payload)) hKey = encryptionMethod.new(keyString) header = hKey.decrypt(base64.b64decode(header)) hLines = header.split('\n') hLines.pop(0) resp = Response(payload) for idx, hLine in enumerate(hLines): k, v = hLine.split(': ', 1) if k.lower() == 'transfer-encoding' and v.lower() == 'chunked': try: unchunkedPayload = extractResponseFromChunkedData(payload) except Exception as e: continue # Data may have no actually been chunked resp.set_data(unchunkedPayload) if k.lower() not in ["content-type", "content-encoding"]: k = "X-Archive-Orig-" + k resp.headers[k] = v # Add ipwb header for additional SW logic newPayload = resp.get_data() ipwbjsinject = """<script src="/webui/webui.js"></script> <script>injectIPWBJS()</script>""" newPayload = newPayload.replace('</html>', ipwbjsinject + '</html>') resp.set_data(newPayload) resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime) # Get TimeMap for Link response header respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime) resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ') return resp
def show_uri(path, datetime=None): global IPFS_API if len(path) == 0: return showWebUI('index.html') sys.exit() if path == 'serviceWorker.js': return getServiceWorker(path) sys.exit() daemonAddress = '{0}:{1}'.format(IPFSAPI_IP, IPFSAPI_PORT) if not ipwbConfig.isDaemonAlive(daemonAddress): errStr = ('IPFS daemon not running. ' 'Start it using $ ipfs daemon on the command-line ' ' or from the <a href="/">' 'IPWB replay homepage</a>.') return Response(errStr) cdxjLine = '' try: surtedURI = surt.surt( # Good ol' pep8 line length path, path_strip_trailing_slash_unless_empty=False) indexPath = ipwbConfig.getIPWBReplayIndexPath() searchString = surtedURI if datetime is not None: searchString = surtedURI + ' ' + datetime cdxjLine = getCDXJLine_binarySearch(searchString, indexPath) except: print sys.exc_info()[0] respString = ('{0} not found :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) if cdxjLine is None: # Resource not found in archives msg = '<h1>ERROR 404</h1>' msg += 'No capture found for {0} at {1}.'.format(path, datetime) linesWithSameURIR = getCDXJLinesWithURIR(path) if linesWithSameURIR: msg += '<p>{0} capture(s) available:</p><ul>'.format( len(linesWithSameURIR)) for line in linesWithSameURIR: fields = line.split(' ', 2) msg += ('<li><a href="/{1}/{0}">{0} at {1}</a></li>'.format( unsurt(fields[0]), fields[1])) msg += '</ul>' return Response(msg, status=404) cdxjParts = cdxjLine.split(" ", 2) jObj = json.loads(cdxjParts[2]) datetime = cdxjParts[1] digests = jObj['locator'].split('/') try: payload = IPFS_API.cat(digests[-1], timeout=1) header = IPFS_API.cat(digests[-2]) except ipfsapi.exceptions.TimeoutError: print "{0} not found at {1}".format(cdxjParts[0], digests[-1]) respString = ('{0} not found in IPFS :(' + ' <a href="http://{1}:{2}">Go home</a>').format( path, IPWBREPLAY_IP, IPWBREPLAY_PORT) return Response(respString) except: print sys.exc_info()[0] print "general error" sys.exit() if 'encryption_method' in jObj: pKey = XOR.new(jObj['encryption_key']) payload = pKey.decrypt(base64.b64decode(payload)) hKey = XOR.new(jObj['encryption_key']) header = hKey.decrypt(base64.b64decode(header)) hLines = header.split('\n') hLines.pop(0) resp = Response(payload) for idx, hLine in enumerate(hLines): k, v = hLine.split(': ', 1) if k.lower() == 'transfer-encoding' and v.lower() == 'chunked': resp.set_data(extractResponseFromChunkedData(payload)) if k.lower() != "content-type": k = "X-Archive-Orig-" + k resp.headers[k] = v resp.headers['Memento-Datetime'] = ipwbConfig.datetimeToRFC1123(datetime) return resp