def expected_metadata_sorting(): spec_schema = Schema(NestedField.required(1, "x", LongType.get()), NestedField.required(2, "y", LongType.get()), NestedField.required(3, "z", LongType.get())) spec = PartitionSpec \ .builder_for(spec_schema) \ .with_spec_id(5) \ .build() random.seed(1234) previous_snapshot_id = int(time.time()) - random.randint(0, 3600) previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None, timestamp_millis=previous_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"), spec_id=spec.spec_id)]) current_snapshot_id = int(time.time()) current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id, timestamp_millis=current_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"), spec_id=spec.spec_id)]) reversed_snapshot_log = list() metadata = TableMetadata(ops, None, "s3://bucket/test/location", int(time.time()), 3, spec_schema, 5, [spec], {"property": "value"}, current_snapshot_id, [previous_snapshot, current_snapshot], reversed_snapshot_log) reversed_snapshot_log.append(SnapshotLogEntry(current_snapshot.timestamp_millis, current_snapshot.snapshot_id)) reversed_snapshot_log.append(SnapshotLogEntry(previous_snapshot.timestamp_millis, previous_snapshot.snapshot_id)) return metadata
def test_compression_property(expected, prop): config = {ConfigProperties.COMPRESS_METADATA: prop} output_file = Files.local_output( TableMetadataParser.get_file_extension(config)) TableMetadataParser.write(expected, output_file) assert prop == is_compressed( TableMetadataParser.get_file_extension(config)) read = TableMetadataParser.read( None, HadoopInputFile.from_location( TableMetadataParser.get_file_extension(config), None)) verify_metadata(read, expected)
def missing_spec_list(): schema = Schema(NestedField.required(1, "x", LongType.get()), NestedField.required(2, "y", LongType.get()), NestedField.required(3, "z", LongType.get())) spec = PartitionSpec.builder_for(schema).identity("x").with_spec_id(6).build() random.seed(1234) previous_snapshot_id = int(time.time()) - random.randint(0, 3600) previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None, timestamp_millis=previous_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"), spec_id=spec.spec_id)]) current_snapshot_id = int(time.time()) current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id, timestamp_millis=current_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"), spec_id=spec.spec_id)]) return TableMetadata(ops, None, "s3://bucket/test/location", int(time.time()), 3, schema, 6, (spec,), {"property": "value"}, current_snapshot_id, [previous_snapshot, current_snapshot], [])
def new_metadata_file(self, filename): return Files.local_output(tempfile.mkstemp(prefix=filename))
def new_input_file(self, path): return Files.local_input(path)
def snapshot_manifests(): return (GenericManifestFile( file=Files.local_input("file:/tmp/manifest1.avro"), spec_id=0), GenericManifestFile( file=Files.local_input("file:/tmp/manifest2.avro"), spec_id=0))
def new_metadata_file(self, filename): return Files.local_output(os.path.join(self.metadata, filename))