Exemplo n.º 1
0
def test_drop_tables_purge(client, current_ops, refresh_call, tmpdir):
    mock_snapshots = [MockSnapshot(location="snap-a.avro",
                                   manifests=[
                                       MockManifest("a-manifest.avro"),
                                       MockManifest("b-manifest.avro")],
                                   manifest_to_entries={
                                       "a-manifest.avro": MockReader(
                                           [MockManifestEntry("a.parquet"),
                                            MockManifestEntry("b.parquet")]),
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")])
                                   }),
                      MockSnapshot(location="snap-b.avro",
                                   manifests=[
                                       MockManifest("b-manifest.avro"),
                                       MockManifest("c-manifest.avro"),
                                       MockManifest("d-manifest.avro")],
                                   manifest_to_entries={
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")]),
                                       "c-manifest.avro": MockReader(
                                           [MockManifestEntry("e.parquet"),
                                            MockManifestEntry("f.parquet")]),
                                       "d-manifest.avro": MockReader(
                                           [MockManifestEntry("g.parquet"),
                                            MockManifestEntry("h.parquet")])
                                   })
                      ]
    ops = MockTableOperations(MockMetadata(mock_snapshots), "a.json")
    current_ops.return_value = ops

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}
    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=True)

    assert len(ops.deleted) == len(set(ops.deleted)), "Paths should only be deleted once"
    assert "a.json" in ops.deleted
    assert "snap-a.avro" in ops.deleted
    assert "snap-b.avro" in ops.deleted
    assert "a-manifest.avro" in ops.deleted
    assert "b-manifest.avro" in ops.deleted
    assert "c-manifest.avro" in ops.deleted
    assert "d-manifest.avro" in ops.deleted
    assert "a.parquet" in ops.deleted
    assert "b.parquet" in ops.deleted
    assert "c.parquet" in ops.deleted
    assert "d.parquet" in ops.deleted
    assert "e.parquet" in ops.deleted
    assert "f.parquet" in ops.deleted
    assert "g.parquet" in ops.deleted
    assert "h.parquet" in ops.deleted
Exemplo n.º 2
0
def test_drop_tables(client, metadata, refresh_call, tmpdir):

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=False)
    client.return_value.__enter__.return_value.drop_table.assert_called_with("test", "test_123", deleteData=False)