Python discover_cube示例，kartothek.api.discover.discover_cube Python示例

示例#1

0

显示文件

文件： test_discover.py 项目： stephan-hesselmann-by/kartothek

 def test_raises_multiple_seeds(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "y": [0],
             "p": [0],
             "q": [0]
         }),
         name=cube.seed_dataset,
         metadata={KTK_CUBE_METADATA_KEY_IS_SEED: True},
     )
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "p": [0],
             "q": [0],
             "i1": [0]
         }),
         name="enrich",
         metadata={KTK_CUBE_METADATA_KEY_IS_SEED: True},
     )
     with pytest.raises(ValueError) as exc:
         discover_cube(cube.uuid_prefix, function_store)
     assert (
         str(exc.value) ==
         'Found multiple possible seed datasets for cube "cube": enrich, myseed'
     )

示例#2

0

显示文件

文件： test_discover.py 项目： lr4d/kartothek

 def test_raises_no_seed(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({"x": [0], "y": [0], "p": [0], "q": [0], "i1": [0]}),
         name=cube.seed_dataset,
         metadata={},
     )
     with pytest.raises(ValueError) as exc:
         discover_cube(cube.uuid_prefix, function_store)
     assert str(exc.value) == 'Could not find seed dataset for cube "cube".'

示例#3

0

显示文件

文件： test_discover.py 项目： lr4d/kartothek

 def test_raises_partition_keys_missing_old_metadata(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({"x": [0], "y": [0], "p": [0], "q": [0]}),
         name=cube.seed_dataset,
         partition_on=None,
         new_ktk_cube_metadata=False,
     )
     with pytest.raises(ValueError) as exc:
         discover_cube(cube.uuid_prefix, function_store)
     assert str(exc.value) == 'Seed dataset ("myseed") has no partition keys.'

示例#4

0

显示文件

文件： test_discover.py 项目： lr4d/kartothek

 def test_raises_partition_keys_impossible(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({"x": [0], "y": [0], "p": [0], "q": [0]}),
         name=cube.seed_dataset,
         partition_on=[],
     )
     with pytest.raises(ValueError) as exc:
         discover_cube(cube.uuid_prefix, function_store)
     assert (
         str(exc.value)
         == 'Seed dataset "myseed" has missing partition columns: p, q'
     )

示例#5

0

显示文件

文件： test_discover.py 项目： lr4d/kartothek

 def test_raises_dimension_columns_missing(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({"x": [0], "y": [0], "p": [0], "q": [0]}),
         name=cube.seed_dataset,
         metadata={KTK_CUBE_METADATA_KEY_IS_SEED: True},
     )
     with pytest.raises(ValueError) as exc:
         discover_cube(cube.uuid_prefix, function_store)
     assert (
         str(exc.value)
         == 'Could not recover dimension columns from seed dataset ("myseed") of cube "cube".'
     )

示例#6

0

显示文件

文件： test_discover.py 项目： lr4d/kartothek

 def test_raises_timestamp_col_is_not_ktk_cube_ts(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame(
             {"x": [0], "y": [0], "p": [0], "q": [0], "ts": [pd.Timestamp("2000")]}
         ),
         partition_on=["p", "q", "ts"],
         name=cube.seed_dataset,
         new_ktk_cube_metadata=False,
     )
     with pytest.raises(
         NotImplementedError,
         match="Can only read old cubes if the timestamp column is 'KLEE_TS', but 'ts' was detected.",
     ):
         discover_cube(cube.uuid_prefix, function_store)

示例#7

0

显示文件

文件： test_discover.py 项目： stephan-hesselmann-by/kartothek

 def test_partition_keys_no_nonseed_other(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "y": [0],
             "p": [0],
             "q": [0]
         }),
         name=cube.seed_dataset,
         partition_on=["p", "q"],
     )
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "y": [0],
             "i1": [0],
             "v1": [0]
         }),
         name="enrich",
         partition_on=[],
     )
     cube_actual, datasets = discover_cube(cube.uuid_prefix, function_store)
     assert cube_actual == cube
     assert set(datasets.keys()) == {cube.seed_dataset, "enrich"}
     ds_seed = datasets[cube.seed_dataset]
     assert ds_seed.primary_indices_loaded
     ds_enrich = datasets["enrich"]
     assert (
         not ds_enrich.partition_keys) or ds_enrich.primary_indices_loaded

示例#8

0

显示文件

文件： test_discover.py 项目： stephan-hesselmann-by/kartothek

 def test_multiple(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "y": [0],
             "p": [0],
             "q": [0]
         }),
         name=cube.seed_dataset,
     )
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "p": [0],
             "q": [0],
             "i1": [0]
         }),
         name="enrich",
     )
     cube_actual, datasets = discover_cube(cube.uuid_prefix, function_store)
     assert cube_actual == cube
     assert set(datasets.keys()) == {cube.seed_dataset, "enrich"}
     ds_seed = datasets[cube.seed_dataset]
     assert ds_seed.primary_indices_loaded
     ds_enrich = datasets["enrich"]
     assert ds_enrich.primary_indices_loaded

示例#9

0

显示文件

def get_cube(store, uuid_prefix):
    """
    Get cube from store.

    Parameters
    ----------
    uuid_prefix: str
        Dataset UUID prefix.
    store: Union[Callable[[], simplekv.KeyValueStore], simplekv.KeyValueStore]
        KV store.

    Returns
    -------
    cube: Cube
        Cube specification.
    datasets: Dict[str, kartothek.core.dataset.DatasetMetadata]
        All discovered datasets.

    Raises
    ------
    click.UsageError
        In case cube was not found.
    """
    try:
        return discover_cube(uuid_prefix, store)
    except ValueError as e:
        raise click.UsageError("Could not load cube: {e}".format(e=e))

示例#10

0

显示文件

文件： test_discover.py 项目： stephan-hesselmann-by/kartothek

    def test_timestamp_col_compat(self, cube, function_store):
        """
        Tests that cubes are still readable after timestamp removal.
        """
        metadata_dimension_columns_old = "klee_dimension_columns"
        metadata_is_seed_old = "klee_is_seed"
        metadata_partition_columns_old = "klee_partition_columns"
        metadata_timestamp_column_old = "klee_timestamp_column"
        timestamp_column_old = "KLEE_TS"

        store_data(
            cube=cube,
            function_store=function_store,
            df=pd.DataFrame({
                "x": [0],
                "y": [0],
                "p": [0],
                "q": [0],
                timestamp_column_old: [pd.Timestamp("2000")],
                "i1": [0],
                "a": [0],
            }),
            partition_on=["p", "q", timestamp_column_old],
            name=cube.seed_dataset,
            metadata={
                metadata_dimension_columns_old: cube.dimension_columns,
                metadata_is_seed_old: True,
                metadata_partition_columns_old: cube.partition_columns,
                metadata_timestamp_column_old: timestamp_column_old,
            },
        )
        store_data(
            cube=cube,
            function_store=function_store,
            df=pd.DataFrame({
                "x": [0],
                "y": [0],
                "p": [0],
                "q": [0],
                timestamp_column_old: [pd.Timestamp("2000")],
                "b": [0],
            }),
            partition_on=["p", "q", timestamp_column_old],
            name="enrich",
            metadata={
                metadata_dimension_columns_old: cube.dimension_columns,
                metadata_is_seed_old: False,
                metadata_partition_columns_old: cube.partition_columns,
                metadata_timestamp_column_old: timestamp_column_old,
            },
        )

        cube_discoverd, datasets_discovered = discover_cube(
            cube.uuid_prefix, function_store)
        assert cube == cube_discoverd
        assert set(datasets_discovered.keys()) == {cube.seed_dataset, "enrich"}

示例#11

0

显示文件

文件： test_discover.py 项目： xhochy/kartothek

 def test_reads_suppress_index(self, cube, function_store):
     cube = cube.copy(suppress_index_on=cube.dimension_columns)
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({"x": [0], "y": [0], "p": [0], "q": [0], "i1": [0]}),
         name=cube.seed_dataset,
     )
     cube_actual, datasets = discover_cube(cube.uuid_prefix, function_store)
     assert cube_actual == cube

示例#12

0

显示文件

文件： test_discover.py 项目： xhochy/kartothek

 def test_reads_suppress_index_default(self, cube, function_store):
     # test that reading also works for old metadata that does not contain the suppress_index_on method.
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({"x": [0], "y": [0], "p": [0], "q": [0], "i1": [0]}),
         name=cube.seed_dataset,
         write_suppress_index_on=False,
     )
     cube_actual, datasets = discover_cube(cube.uuid_prefix, function_store)
     assert cube_actual == cube

示例#13

0

显示文件

文件： test_discover.py 项目： lr4d/kartothek

 def test_seed_only(self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({"x": [0], "y": [0], "p": [0], "q": [0], "i1": [0]}),
         name=cube.seed_dataset,
     )
     cube_actual, datasets = discover_cube(cube.uuid_prefix, function_store)
     assert cube_actual == cube
     assert set(datasets.keys()) == {cube.seed_dataset}
     ds = datasets[cube.seed_dataset]
     assert ds.primary_indices_loaded

示例#14

0

显示文件

文件： test_discover.py 项目： stephan-hesselmann-by/kartothek

 def test_raises_partition_keys_impossible_old_metadata(
         self, cube, function_store):
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "y": [0],
             "p": [0],
             "q": [0],
             "KLEE_TS": [pd.Timestamp("2000")],
         }),
         partition_on=["KLEE_TS"],
         name=cube.seed_dataset,
         new_ktk_cube_metadata=False,
     )
     with pytest.raises(ValueError) as exc:
         discover_cube(cube.uuid_prefix, function_store)
     assert (
         str(exc.value) ==
         'Seed dataset ("myseed") has only a single partition key (KLEE_TS) but should have at least 2.'
     )

示例#15

0

显示文件

文件： test_discover.py 项目： stephan-hesselmann-by/kartothek

 def test_without_partition_timestamp_metadata(self, cube, function_store):
     # test discovery of a cube without metadata keys
     # "KLEE_TS" and KTK_CUBE_METADATA_PARTITION_COLUMNS still works
     store_data(
         cube=cube,
         function_store=function_store,
         df=pd.DataFrame({
             "x": [0],
             "y": [0],
             "p": [0],
             "q": [0],
             "KLEE_TS": [pd.Timestamp("2000")],
             "i1": [0],
         }),
         partition_on=["p", "q", "KLEE_TS"],
         name=cube.seed_dataset,
         new_ktk_cube_metadata=False,
     )
     cube_actual, datasets = discover_cube(cube.uuid_prefix, function_store)
     assert cube_actual == cube
     assert set(datasets.keys()) == {cube.seed_dataset}