Exemplo n.º 1
0
def expected_metadata_sorting():
    spec_schema = Schema(NestedField.required(1, "x", LongType.get()),
                         NestedField.required(2, "y", LongType.get()),
                         NestedField.required(3, "z", LongType.get()))

    spec = PartitionSpec \
        .builder_for(spec_schema) \
        .with_spec_id(5) \
        .build()

    random.seed(1234)
    previous_snapshot_id = int(time.time()) - random.randint(0, 3600)

    previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None,
                                     timestamp_millis=previous_snapshot_id,
                                     manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"),
                                                                    spec_id=spec.spec_id)])

    current_snapshot_id = int(time.time())
    current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id,
                                    timestamp_millis=current_snapshot_id,
                                    manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"),
                                                                   spec_id=spec.spec_id)])

    reversed_snapshot_log = list()
    metadata = TableMetadata(ops, None, "s3://bucket/test/location",
                             int(time.time()), 3, spec_schema, 5, [spec], {"property": "value"}, current_snapshot_id,
                             [previous_snapshot, current_snapshot], reversed_snapshot_log)

    reversed_snapshot_log.append(SnapshotLogEntry(current_snapshot.timestamp_millis, current_snapshot.snapshot_id))
    reversed_snapshot_log.append(SnapshotLogEntry(previous_snapshot.timestamp_millis, previous_snapshot.snapshot_id))

    return metadata
def test_compression_property(expected, prop):
    config = {ConfigProperties.COMPRESS_METADATA: prop}
    output_file = Files.local_output(
        TableMetadataParser.get_file_extension(config))
    TableMetadataParser.write(expected, output_file)
    assert prop == is_compressed(
        TableMetadataParser.get_file_extension(config))
    read = TableMetadataParser.read(
        None,
        HadoopInputFile.from_location(
            TableMetadataParser.get_file_extension(config), None))
    verify_metadata(read, expected)
Exemplo n.º 3
0
def missing_spec_list():
    schema = Schema(NestedField.required(1, "x", LongType.get()),
                    NestedField.required(2, "y", LongType.get()),
                    NestedField.required(3, "z", LongType.get()))

    spec = PartitionSpec.builder_for(schema).identity("x").with_spec_id(6).build()
    random.seed(1234)
    previous_snapshot_id = int(time.time()) - random.randint(0, 3600)

    previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None,
                                     timestamp_millis=previous_snapshot_id,
                                     manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"),
                                                                    spec_id=spec.spec_id)])

    current_snapshot_id = int(time.time())
    current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id,
                                    timestamp_millis=current_snapshot_id,
                                    manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"),
                                                                   spec_id=spec.spec_id)])
    return TableMetadata(ops, None, "s3://bucket/test/location", int(time.time()), 3, schema, 6,
                         (spec,), {"property": "value"}, current_snapshot_id, [previous_snapshot, current_snapshot],
                         [])
Exemplo n.º 4
0
 def new_metadata_file(self, filename):
     return Files.local_output(tempfile.mkstemp(prefix=filename))
Exemplo n.º 5
0
 def new_input_file(self, path):
     return Files.local_input(path)
Exemplo n.º 6
0
def snapshot_manifests():
    return (GenericManifestFile(
        file=Files.local_input("file:/tmp/manifest1.avro"), spec_id=0),
            GenericManifestFile(
                file=Files.local_input("file:/tmp/manifest2.avro"), spec_id=0))
Exemplo n.º 7
0
 def new_metadata_file(self, filename):
     return Files.local_output(os.path.join(self.metadata, filename))