Exemplo n.º 1
0
        def list_files_in_table(
            self,
            table: Table,
            *,
            predicateHints: Optional[Sequence[str]] = None,
            limitHint: Optional[int] = None,
        ) -> ListFilesInTableResponse:
            assert table == Table("table_name", "share_name", "schema_name")

            metadata = Metadata(schema_string=(
                '{"fields":['
                '{"metadata":{},"name":"a","nullable":true,"type":"boolean"},'
                '{"metadata":{},"name":"b","nullable":true,"type":"byte"},'
                '{"metadata":{},"name":"c","nullable":true,"type":"short"},'
                '{"metadata":{},"name":"d","nullable":true,"type":"integer"},'
                '{"metadata":{},"name":"e","nullable":true,"type":"long"},'
                '{"metadata":{},"name":"f","nullable":true,"type":"float"},'
                '{"metadata":{},"name":"g","nullable":true,"type":"double"},'
                '{"metadata":{},"name":"h","nullable":true,"type":"decimal(5,2)"},'
                '{"metadata":{},"name":"i","nullable":true,"type":"string"},'
                '{"metadata":{},"name":"j","nullable":true,"type":"binary"},'
                '{"metadata":{},"name":"k","nullable":true,"type":"timestamp"},'
                '{"metadata":{},"name":"l","nullable":true,"type":"date"},'
                '{"metadata":{},"name":"m","nullable":true,"type":{"type":"array",'
                '"elementType":"string","containsNull":true}},'
                '{"metadata":{},"name":"n","nullable":true,"type":{"type":"struct","fields":'
                '[{"name":"foo","type":"string","nullable":true,"metadata":{}},'
                '{"name":"bar","type":"integer","nullable":true,"metadata":{}}]}},'
                '{"metadata":{},"name":"o","nullable":true,"type":{"type":"map",'
                '"keyType":"string","valueType":"integer","valueContainsNull":true}}'
                '],"type":"struct"}'))
            add_files: Sequence[AddFile] = []
            return ListFilesInTableResponse(protocol=None,
                                            metadata=metadata,
                                            add_files=add_files)
Exemplo n.º 2
0
        def list_files_in_table(
            self,
            table: Table,
            *,
            predicateHints: Optional[Sequence[str]] = None,
            limitHint: Optional[int] = None,
        ) -> ListFilesInTableResponse:
            assert table == Table("table_name", "share_name", "schema_name")

            metadata = Metadata(schema_string=(
                '{"fields":['
                '{"metadata":{},"name":"a","nullable":true,"type":"long"},'
                '{"metadata":{},"name":"b","nullable":true,"type":"string"}'
                '],"type":"struct"}'))
            add_files = [
                AddFile(
                    url=str(tmp_path / "pdf1.parquet"),
                    id="pdf1",
                    partition_values={"b": "x"},
                    size=0,
                    stats="",
                ),
                AddFile(
                    url=str(tmp_path / "pdf2.parquet"),
                    id="pdf2",
                    partition_values={"b": "y"},
                    size=0,
                    stats="",
                ),
            ]
            return ListFilesInTableResponse(protocol=None,
                                            metadata=metadata,
                                            add_files=add_files)
Exemplo n.º 3
0
    def list_files_in_table(
        self,
        table: Table,
        *,
        predicateHints: Optional[Sequence[str]] = None,
        limitHint: Optional[int] = None,
    ) -> ListFilesInTableResponse:
        data: Dict = {}
        if predicateHints is not None:
            data["predicateHints"] = predicateHints
        if limitHint is not None:
            data["limitHint"] = limitHint

        with self._post_internal(
                f"/shares/{table.share}/schemas/{table.schema}/tables/{table.name}/query",
                data=data,
        ) as lines:
            protocol_json = json.loads(next(lines))
            metadata_json = json.loads(next(lines))
            return ListFilesInTableResponse(
                protocol=Protocol.from_json(protocol_json["protocol"]),
                metadata=Metadata.from_json(metadata_json["metaData"]),
                add_files=[
                    AddFile.from_json(json.loads(file)["file"])
                    for file in lines
                ],
            )
Exemplo n.º 4
0
 def query_table_metadata(self, table: Table) -> QueryTableMetadataResponse:
     with self._get_internal(
             f"/shares/{table.share}/schemas/{table.schema}/tables/{table.name}/metadata"
     ) as lines:
         protocol_json = json.loads(next(lines))
         metadata_json = json.loads(next(lines))
         return QueryTableMetadataResponse(
             protocol=Protocol.from_json(protocol_json["protocol"]),
             metadata=Metadata.from_json(metadata_json["metaData"]),
         )
Exemplo n.º 5
0
def test_list_files_in_table_partitioned_different_schemas(
        rest_client: DataSharingRestClient):
    response = rest_client.list_files_in_table(
        Table(name="table3", share="share1", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="7ba6d727-a578-4234-a138-953f790b427c",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}},'
         '{"name":"type","type":"string","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
    assert response.add_files == [
        AddFile(
            url=response.add_files[0].url,
            id="db213271abffec6fd6c7fc2aad9d4b3f",
            partition_values={"date": "2021-04-28"},
            size=778,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T23:36:51.945Z","type":"bar"},'
             r'"maxValues":{"eventTime":"2021-04-28T23:36:51.945Z","type":"bar"},'
             r'"nullCount":{"eventTime":0,"type":0}}'),
        ),
        AddFile(
            url=response.add_files[1].url,
            id="f1f8be229d8b18eb6d6a34255f2d7089",
            partition_values={"date": "2021-04-28"},
            size=778,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T23:36:47.599Z","type":"foo"},'
             r'"maxValues":{"eventTime":"2021-04-28T23:36:47.599Z","type":"foo"},'
             r'"nullCount":{"eventTime":0,"type":0}}'),
        ),
        AddFile(
            url=response.add_files[2].url,
            id="a892a55d770ee70b34ffb2ebf7dc2fd0",
            partition_values={"date": "2021-04-28"},
            size=573,
            stats=(r'{"numRecords":1,'
                   r'"minValues":{"eventTime":"2021-04-28T23:35:53.156Z"},'
                   r'"maxValues":{"eventTime":"2021-04-28T23:35:53.156Z"},'
                   r'"nullCount":{"eventTime":0}}'),
        ),
    ]
Exemplo n.º 6
0
def test_query_table_metadata_partitioned(rest_client: DataSharingRestClient):
    response = rest_client.query_table_metadata(
        Table(name="table2", share="share2", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="f8d5c169-3d01-4ca3-ad9e-7dc3355aedb2",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
Exemplo n.º 7
0
def test_metadata():
    schema_string = (
        r"{\"type\":\"struct\",\"fields\":["
        r"{\"name\":\"_1\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},"
        r"{\"name\":\"_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}"
    )
    json = f"""
        {{
            "id" : "testId",
            "format" : {{
                "provider" : "parquet",
                "options" : {{}}
            }},
            "schemaString" : "{schema_string}",
            "partitionColumns" : []
        }}
        """
    metadata = Metadata.from_json(json)
    assert metadata == Metadata(
        id="testId",
        format=Format(),
        schema_string=schema_string.replace(r"\"", '"'),
        partition_columns=[],
    )
Exemplo n.º 8
0
def test_query_table_metadata_non_partitioned(
        rest_client: DataSharingRestClient):
    response = rest_client.query_table_metadata(
        Table(name="table1", share="share1", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="ed96aa41-1d81-4b7f-8fb5-846878b4b0cf",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=[],
    )
Exemplo n.º 9
0
def test_query_table_metadata_partitioned_different_schemas(
        rest_client: DataSharingRestClient):
    response = rest_client.query_table_metadata(
        Table(name="table3", share="share1", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="7ba6d727-a578-4234-a138-953f790b427c",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}},'
         '{"name":"type","type":"string","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
Exemplo n.º 10
0
def test_list_files_in_table_non_partitioned(
        rest_client: DataSharingRestClient):
    response = rest_client.list_files_in_table(
        Table(name="table1", share="share1", schema="default"),
        predicateHints=["date = '2021-01-31'"],
    )
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="ed96aa41-1d81-4b7f-8fb5-846878b4b0cf",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=[],
    )
    assert response.add_files == [
        AddFile(
            url=response.add_files[0].url,
            id="061cb3683a467066995f8cdaabd8667d",
            partition_values={},
            size=781,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T06:32:22.421Z","date":"2021-04-28"},'
             r'"maxValues":{"eventTime":"2021-04-28T06:32:22.421Z","date":"2021-04-28"},'
             r'"nullCount":{"eventTime":0,"date":0}}'),
        ),
        AddFile(
            url=response.add_files[1].url,
            id="e268cbf70dbaa6143e7e9fa3e2d3b00e",
            partition_values={},
            size=781,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T06:32:02.070Z","date":"2021-04-28"},'
             r'"maxValues":{"eventTime":"2021-04-28T06:32:02.070Z","date":"2021-04-28"},'
             r'"nullCount":{"eventTime":0,"date":0}}'),
        ),
    ]
Exemplo n.º 11
0
def test_list_files_in_table_partitioned(rest_client: DataSharingRestClient):
    response = rest_client.list_files_in_table(
        Table(name="table2", share="share2", schema="default"),
        predicateHints=["date = '2021-01-31'"],
        limitHint=123,
    )
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="f8d5c169-3d01-4ca3-ad9e-7dc3355aedb2",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
    assert response.add_files == [
        AddFile(
            url=response.add_files[0].url,
            id="9f1a49539c5cffe1ea7f9e055d5c003c",
            partition_values={"date": "2021-04-28"},
            size=573,
            stats=(r'{"numRecords":1,'
                   r'"minValues":{"eventTime":"2021-04-28T23:33:57.955Z"},'
                   r'"maxValues":{"eventTime":"2021-04-28T23:33:57.955Z"},'
                   r'"nullCount":{"eventTime":0}}'),
        ),
        AddFile(
            url=response.add_files[1].url,
            id="cd2209b32f5ed5305922dd50f5908a75",
            partition_values={"date": "2021-04-28"},
            size=573,
            stats=(r'{"numRecords":1,'
                   r'"minValues":{"eventTime":"2021-04-28T23:33:48.719Z"},'
                   r'"maxValues":{"eventTime":"2021-04-28T23:33:48.719Z"},'
                   r'"nullCount":{"eventTime":0}}'),
        ),
    ]