Пример #1
0
def test_3rd_party_registry_store() -> None:
    """
    Test running apply on third party registry stores
    """
    runner = CliRunner()
    # Check with incorrect built-in provider name (no dots)
    with setup_third_party_registry_store_repo("feast123") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b'Registry store class name should end with "RegistryStore"')
    # Check with incorrect third-party registry store name (with dots)
    with setup_third_party_registry_store_repo(
            "feast_foo.RegistryStore") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Could not import RegistryStore module 'feast_foo'")
    # Check with incorrect third-party registry store name (with dots)
    with setup_third_party_registry_store_repo(
            "foo.FooRegistryStore") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Could not import RegistryStore 'FooRegistryStore' from module 'foo'"
        )
    # Check with correct third-party registry store name
    with setup_third_party_registry_store_repo(
            "foo.registry_store.FooRegistryStore") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(0)
Пример #2
0
def test_3rd_party_providers() -> None:
    """
    Test running apply on third party providers
    """
    runner = CliRunner()
    # Check with incorrect built-in provider name (no dots)
    with setup_third_party_provider_repo("feast123") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Provider 'feast123' is not implemented")
    # Check with incorrect third-party provider name (with dots)
    with setup_third_party_provider_repo("feast_foo.Provider") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Could not import Provider module 'feast_foo'")
    # Check with incorrect third-party provider name (with dots)
    with setup_third_party_provider_repo("foo.FooProvider") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Could not import Provider 'FooProvider' from module 'foo'")
    # Check with correct third-party provider name
    with setup_third_party_provider_repo(
            "foo.provider.FooProvider") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(0)
Пример #3
0
def test_cli_apply_duplicated_featureview_names() -> None:
    """
    Test apply feature views with duplicated names and single py file in a feature repo using CLI
    """

    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(
            get_example_repo(
                "example_feature_repo_with_duplicated_featureview_names.py"))
        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert (rc != 0
                and b"Please ensure that all feature view names are unique"
                in output)
Пример #4
0
def test_missing_bq_source_fail() -> None:
    project_id = "".join(
        random.choice(string.ascii_lowercase + string.digits)
        for _ in range(10))
    runner = CliRunner()
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: {project_id}
        registry: {data_path / "registry.db"}
        provider: gcp
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(
            get_example_repo("example_feature_repo_with_missing_bq_source.py"))

        returncode, output = runner.run_with_output(["apply"], cwd=repo_path)
        assert returncode == 1
        assert b"DataSourceNotFoundException" in output
def test_cli_apply_imported_featureview_with_duplication() -> None:
    """
    Test apply feature views with duplicated names and single py file in a feature repo using CLI
    """

    with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(
                f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """
            )
        )

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
        repo_example_2 = repo_path / "example_2.py"
        repo_example_2.write_text(
            "from datetime import timedelta\n"
            "from example import driver_hourly_stats, driver_hourly_stats_view\n"
            "from feast import FeatureService, FeatureView\n"
            "a_feature_service = FeatureService(\n"
            "   name='driver_locations_service',\n"
            "   features=[driver_hourly_stats_view],\n"
            ")\n"
            "driver_hourly_stats_view_2 = FeatureView(\n"
            "   name='driver_hourly_stats',\n"
            "   entities=['driver_id'],\n"
            "   ttl=timedelta(days=1),\n"
            "   online=True,\n"
            "   batch_source=driver_hourly_stats,\n"
            "   tags={'dummy': 'true'})\n"
        )

        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert rc != 0
        assert (
            b"More than one feature view with name driver_hourly_stats found." in output
        )
def test_cli_apply_imported_featureview() -> None:
    """
    Test apply feature views with duplicated names and single py file in a feature repo using CLI
    """

    with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(
                f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """
            )
        )

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
        repo_example_2 = repo_path / "example_2.py"
        repo_example_2.write_text(
            "from example import driver_hourly_stats_view\n"
            "from feast import FeatureService\n"
            "a_feature_service = FeatureService(\n"
            "   name='driver_locations_service',\n"
            "   features=[driver_hourly_stats_view],\n"
            ")\n"
        )

        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert rc == 0
        assert b"Created feature service driver_locations_service" in output
Пример #7
0
def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes):
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo(example_repo_file_name))
        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert rc != 0 and expected_error in output
Пример #8
0
def test_e2e_local() -> None:
    """
    A more comprehensive than "basic" test, using local provider.

    1. Create a repo.
    2. Apply
    3. Ingest some data to online store from parquet
    4. Read from the online store to make sure it made it there.
    """

    runner = CliRunner()
    with tempfile.TemporaryDirectory() as data_dir:

        # Generate some test data in parquet format.
        end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
        start_date = end_date - timedelta(days=15)

        driver_entities = [1001, 1002, 1003, 1004, 1005]
        driver_df = driver_data.create_driver_hourly_stats_df(
            driver_entities, start_date, end_date
        )
        driver_stats_path = os.path.join(data_dir, "driver_stats.parquet")
        driver_df.to_parquet(path=driver_stats_path, allow_truncated_timestamps=True)

        global_df = driver_data.create_global_daily_stats_df(start_date, end_date)
        global_stats_path = os.path.join(data_dir, "global_stats.parquet")
        global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True)

        # Note that runner takes care of running apply/teardown for us here.
        # We patch python code in example_feature_repo_2.py to set the path to Parquet files.
        with runner.local_repo(
            get_example_repo("example_feature_repo_2.py")
            .replace("%PARQUET_PATH%", driver_stats_path)
            .replace("%PARQUET_PATH_GLOBAL%", global_stats_path),
            "file",
        ) as store:

            assert store.repo_path is not None

            # feast materialize
            r = runner.run(
                [
                    "materialize",
                    start_date.isoformat(),
                    (end_date - timedelta(days=7)).isoformat(),
                ],
                cwd=Path(store.repo_path),
            )

            assert r.returncode == 0

            _assert_online_features(store, driver_df, end_date - timedelta(days=7))

            # feast materialize-incremental
            r = runner.run(
                ["materialize-incremental", end_date.isoformat()],
                cwd=Path(store.repo_path),
            )

            assert r.returncode == 0

            _assert_online_features(store, driver_df, end_date)

        # Test a failure case when the parquet file doesn't include a join key
        with runner.local_repo(
            get_example_repo("example_feature_repo_with_entity_join_key.py").replace(
                "%PARQUET_PATH%", driver_stats_path
            ),
            "file",
        ) as store:

            assert store.repo_path is not None

            # feast materialize
            returncode, output = runner.run_with_output(
                [
                    "materialize",
                    start_date.isoformat(),
                    (end_date - timedelta(days=7)).isoformat(),
                ],
                cwd=Path(store.repo_path),
            )

            assert returncode != 0
            assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(output)