Exemplo n.º 1
0
    def test_cdxj_empty(self):
        output = StringIO()

        empty = BytesIO()

        opts = {"filename": "empty.warc.gz"}

        write_cdx_index(output, empty, opts)

        assert output.getvalue() == ""
Exemplo n.º 2
0
    def test_cdxj_middle_empty_records(self):
        empty_gzip_record = b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00"

        new_warc = BytesIO()

        with open(os.path.join(TEST_DIR, "example.warc.gz"), "rb") as fh:
            new_warc.write(empty_gzip_record)
            new_warc.write(fh.read())
            new_warc.write(empty_gzip_record)
            new_warc.write(empty_gzip_record)
            fh.seek(0)
            new_warc.write(fh.read())

        new_warc.seek(0)

        output = StringIO()
        opts = {"filename": "empty.warc.gz"}

        write_cdx_index(output, new_warc, opts)

        lines = output.getvalue().rstrip().split("\n")

        assert len(lines) == 4, lines
Exemplo n.º 3
0
 def index_all(self, **opts):
     output = StringIO()
     # paths = [os.path.join(TEST_DIR, filename) for filename in os.listdir(TEST_DIR)]
     paths = [TEST_DIR]
     write_cdx_index(output, paths, opts)
     return output.getvalue()
Exemplo n.º 4
0
 def index_file(self, filename, **opts):
     output = StringIO()
     write_cdx_index(output, os.path.join(TEST_DIR, filename), opts)
     return output.getvalue()