Exemplo n.º 1
0
    def test_gets_max_denies_text(self):
        key = "safekeyprefixname/safedatasetname"
        bucket = "safebucketname"
        part_types = {"string_col": "string", "bool_col": "bool"}
        bucket, df, partitions, published_files = self.mock_publish(
            bucket=bucket, key=key, partition_types=part_types)

        with pytest.raises(ValueError):
            fetched_max = fetch_parq.get_max_partition_value(
                bucket=bucket, key=key, partition="string_col")

        with pytest.raises(ValueError):
            fetched_max = fetch_parq.get_max_partition_value(
                bucket=bucket, key=key, partition="bool_col")
Exemplo n.º 2
0
def test_gets_max_denies_text():
    key = "safekeyprefixname/safedatasetname"
    bucket = "safebucketname"
    part_types = {"string_col": "string", "bool_col": "bool"}
    col_types = dict(part_types)
    col_types["metrics"] = "int"
    df = setup_grouped_dataframe(count=10, columns=col_types)
    bucket, parquet_paths = setup_partitioned_parquet(
        dataframe=df, bucket=bucket, key=key, partition_data_types=part_types)

    with pytest.raises(ValueError):
        fetched_max = fetch_parq.get_max_partition_value(
            bucket=bucket, key=key, partition="string_col")

    with pytest.raises(ValueError):
        fetched_max = fetch_parq.get_max_partition_value(bucket=bucket,
                                                         key=key,
                                                         partition="bool_col")
Exemplo n.º 3
0
    def test_gets_max(self):
        key = "safekeyprefixname/safedatasetname"
        bucket = "safebucketname"
        part_types = {"int_col": "int", "float_col": "float"}
        bucket, df, partitions, published_files = self.mock_publish(
            bucket=bucket, key=key, partition_types=part_types)

        fetched_max = fetch_parq.get_max_partition_value(bucket=bucket,
                                                         key=key,
                                                         partition="int_col")

        # Test max of column is max of the fetched partition
        assert df["int_col"].max() == fetched_max
Exemplo n.º 4
0
def test_gets_max():
    key = "safekeyprefixname/safedatasetname"
    bucket = "safebucketname"
    part_types = {"int_col": "int", "float_col": "float"}

    df = setup_grouped_dataframe(count=10, columns=part_types)
    bucket, parquet_paths = setup_partitioned_parquet(
        dataframe=df,
        bucket=bucket,
        key=key,
        partition_data_types={"int_col": "int"})

    fetched_max = fetch_parq.get_max_partition_value(bucket=bucket,
                                                     key=key,
                                                     partition="int_col")

    # Test max of column is max of the fetched partition
    assert df["int_col"].max() == fetched_max
Exemplo n.º 5
0
 def get_max_partition_value(self, bucket: str, key: str, partition: str) -> any:
     return get_max_partition_value(
         bucket=bucket,
         key=key,
         partition=partition
     )