Exemplo n.º 1
0
def startReplay(warcFilename):
    global p
    pathOfWARC = os.path.join(os.path.dirname(__file__) +
                              '/../samples/warcs/' + warcFilename)
    tempFilePath = tempfile.gettempdir() + '/' + ''.join(random.sample(
        string.ascii_uppercase + string.digits * 6, 12)) + '.cdxj'

    open(tempFilePath, 'a').close()  # Create placeholder file for replay

    p = Process(target=replay.start, args=[tempFilePath])
    p.start()
    sleep(5)

    cdxjList = indexer.indexFileAt(pathOfWARC, quiet=True)
    cdxj = '\n'.join(cdxjList)

    with open(tempFilePath, 'w') as f:
        f.write(cdxj)
Exemplo n.º 2
0
def test_push():
    """
    Read WARC, manipulate content to ensure uniqueness, push to IPFS
      WARC should result in two CDXJ entries with three space-limited fields
      each: surt URI, datetime, JSON
      JSON should contain AT LEAST locator, mime_type, and status fields
    """
    newWARCPath = ipwbTest.createUniqueWARC()
    # use ipwb indexer to push
    cdxjList = indexer.indexFileAt(newWARCPath, quiet=True)
    cdxj = '\n'.join(cdxjList)

    firstEntry = cdxj.split('\n')[0]
    firstNonMetadataEntry = ''
    for line in cdxj.split('\n'):
        if line[0] != '!':
            firstNonMetadataEntry = line
            break

    assert checkCDXJFields(firstNonMetadataEntry)
    firstEntryLastField = firstNonMetadataEntry.split(' ', 2)[2]
    assert checkIPWBJSONFieldPresesence(firstEntryLastField)
Exemplo n.º 3
0
def test_warc_ipwbIndexerBrokenWARCRecord():
    pathOfBrokenWARC = os.path.join(os.path.dirname(__file__) +
                                    '/samples/warcs/broken.warc')
    cdxjList = indexer.indexFileAt(pathOfBrokenWARC, quiet=True)
    cdxj = '\n'.join(cdxjList)
    assert ipwbTest.countCDXJEntries(cdxj) == 1
Exemplo n.º 4
0
def test_cdxj_warc_responseRecordCount():
    newWARCPath = ipwbTest.createUniqueWARC()
    # use ipwb indexer to push
    cdxjList = indexer.indexFileAt(newWARCPath, quiet=True)
    cdxj = '\n'.join(cdxjList)
    assert ipwbTest.countCDXJEntries(cdxj) == 2