Пример #1
0
def test_drop_tables_purge(client, current_ops, refresh_call, tmpdir):
    mock_snapshots = [MockSnapshot(location="snap-a.avro",
                                   manifests=[
                                       MockManifest("a-manifest.avro"),
                                       MockManifest("b-manifest.avro")],
                                   manifest_to_entries={
                                       "a-manifest.avro": MockReader(
                                           [MockManifestEntry("a.parquet"),
                                            MockManifestEntry("b.parquet")]),
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")])
                                   }),
                      MockSnapshot(location="snap-b.avro",
                                   manifests=[
                                       MockManifest("b-manifest.avro"),
                                       MockManifest("c-manifest.avro"),
                                       MockManifest("d-manifest.avro")],
                                   manifest_to_entries={
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")]),
                                       "c-manifest.avro": MockReader(
                                           [MockManifestEntry("e.parquet"),
                                            MockManifestEntry("f.parquet")]),
                                       "d-manifest.avro": MockReader(
                                           [MockManifestEntry("g.parquet"),
                                            MockManifestEntry("h.parquet")])
                                   })
                      ]
    ops = MockTableOperations(MockMetadata(mock_snapshots), "a.json")
    current_ops.return_value = ops

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}
    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=True)

    assert len(ops.deleted) == len(set(ops.deleted)), "Paths should only be deleted once"
    assert "a.json" in ops.deleted
    assert "snap-a.avro" in ops.deleted
    assert "snap-b.avro" in ops.deleted
    assert "a-manifest.avro" in ops.deleted
    assert "b-manifest.avro" in ops.deleted
    assert "c-manifest.avro" in ops.deleted
    assert "d-manifest.avro" in ops.deleted
    assert "a.parquet" in ops.deleted
    assert "b.parquet" in ops.deleted
    assert "c.parquet" in ops.deleted
    assert "d.parquet" in ops.deleted
    assert "e.parquet" in ops.deleted
    assert "f.parquet" in ops.deleted
    assert "g.parquet" in ops.deleted
    assert "h.parquet" in ops.deleted
Пример #2
0
def test_load_tables_check_no_location(client, current_call, refresh_call):
    parameters = {"table_type": "ICEBERG", "partition_spec": []}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(
        parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    with raises(RuntimeError):
        tables.load("test.test_123")
Пример #3
0
def test_load_tables_check_missing_iceberg_type(client, current_call, refresh_call):
    parameters = {"partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metdata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    with raises(RuntimeError):
        tables.load("test.test_123")
Пример #4
0
def test_load_tables_check_valid_props(client, current_call, refresh_call):
    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    tables.load("test.test_123")
Пример #5
0
def test_drop_tables(client, metadata, refresh_call, tmpdir):

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=False)
    client.return_value.__enter__.return_value.drop_table.assert_called_with("test", "test_123", deleteData=False)
Пример #6
0
def test_create_tables(client, current_call, base_scan_schema, base_scan_partition, tmpdir):

    client.return_value.__enter__.return_value.get_table.side_effect = NoSuchObjectException()
    current_call.return_value = None
    client.return_value.__enter__.return_value.lock.return_value = LockResponse("x", LockState.WAITING)
    client.return_value.__enter__.return_value.check_lock.return_value = LockResponse("x", LockState.ACQUIRED)
    tbl = client.return_value.__enter__.return_value.create_table.call_args_list
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.create(base_scan_schema, "test.test_123", base_scan_partition)

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(tbl[0].args[0].parameters)
    client.return_value.__enter__.return_value.get_table.side_effect = None
    current_call.return_value = tbl[0].args[0].parameters['metadata_location']

    tables.load("test.test_123")