示例#1
0
def prep_local_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]:
    with tempfile.NamedTemporaryFile(suffix=".parquet") as f:
        df = create_dataset()
        f.close()
        df.to_parquet(f.name)
        file_source = FileSource(
            file_format=ParquetFormat(),
            file_url=f"file://{f.name}",
            event_timestamp_column="ts",
            created_timestamp_column="created_ts",
            date_partition_column="",
            field_mapping={
                "ts_1": "ts",
                "id": "driver_id"
            },
        )
        fv = get_feature_view(file_source)
        with tempfile.TemporaryDirectory(
        ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
            config = RepoConfig(
                registry=str(Path(repo_dir_name) / "registry.db"),
                project=
                f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
                provider="local",
                online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig(
                    path=str(Path(data_dir_name) / "online_store.db"))),
            )
            fs = FeatureStore(config=config)
            fs.apply([fv])

            yield fs, fv
示例#2
0
def prep_dynamodb_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]:
    with tempfile.NamedTemporaryFile(suffix=".parquet") as f:
        df = create_dataset()
        f.close()
        df.to_parquet(f.name)
        file_source = FileSource(
            file_format=ParquetFormat(),
            file_url=f"file://{f.name}",
            event_timestamp_column="ts",
            created_timestamp_column="created_ts",
            date_partition_column="",
            field_mapping={"ts_1": "ts", "id": "driver_id"},
        )
        fv = get_feature_view(file_source)
        e = Entity(
            name="driver",
            description="id for driver",
            join_key="driver_id",
            value_type=ValueType.INT32,
        )
        with tempfile.TemporaryDirectory() as repo_dir_name:
            config = RepoConfig(
                registry=str(Path(repo_dir_name) / "registry.db"),
                project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
                provider="aws",
                online_store=DynamoDBOnlineStoreConfig(region="us-west-2"),
                offline_store=FileOfflineStoreConfig(),
            )
            fs = FeatureStore(config=config)
            fs.apply([fv, e])

            yield fs, fv
示例#3
0
def test_usage_on(dummy_exporter, enabling_toggle):
    _reload_feast()
    from feast.feature_store import FeatureStore

    with tempfile.TemporaryDirectory() as temp_dir:
        test_feature_store = FeatureStore(config=RepoConfig(
            registry=os.path.join(temp_dir, "registry.db"),
            project="fake_project",
            provider="local",
            online_store=SqliteOnlineStoreConfig(
                path=os.path.join(temp_dir, "online.db")),
        ))
        entity = Entity(
            name="driver_car_id",
            description="Car driver id",
            value_type=ValueType.STRING,
            tags={"team": "matchmaking"},
        )

        test_feature_store.apply([entity])

        assert len(dummy_exporter) == 3
        assert {
            "entrypoint":
            "feast.infra.local.LocalRegistryStore.get_registry_proto"
        }.items() <= dummy_exporter[0].items()
        assert {
            "entrypoint":
            "feast.infra.local.LocalRegistryStore.update_registry_proto"
        }.items() <= dummy_exporter[1].items()
        assert {
            "entrypoint": "feast.feature_store.FeatureStore.apply"
        }.items() <= dummy_exporter[2].items()
示例#4
0
文件: cli.py 项目: qooba/feast
def feature_view_list(ctx: click.Context):
    """
    List all feature views
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for feature_view in [
            *store.list_feature_views(),
            *store.list_request_feature_views(),
            *store.list_on_demand_feature_views(),
    ]:
        entities = set()
        if isinstance(feature_view, FeatureView):
            entities.update(feature_view.entities)
        elif isinstance(feature_view, OnDemandFeatureView):
            for backing_fv in feature_view.inputs.values():
                if isinstance(backing_fv, FeatureView):
                    entities.update(backing_fv.entities)
        table.append([
            feature_view.name,
            entities if len(entities) > 0 else "n/a",
            type(feature_view).__name__,
        ])

    from tabulate import tabulate

    print(
        tabulate(table, headers=["NAME", "ENTITIES", "TYPE"],
                 tablefmt="plain"))
示例#5
0
文件: cli.py 项目: qooba/feast
def serve_transformations_command(ctx: click.Context, port: int):
    """[Experimental] Start a the feature consumption server locally on a given port."""
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))

    store.serve_transformations(port)
示例#6
0
def test_non_local_feature_repo() -> None:
    """
    Test running apply on a sample repo, and make sure the infra gets created.
    """
    runner = CliRunner()
    with tempfile.TemporaryDirectory() as repo_dir_name:

        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent("""
        project: foo
        registry: data/registry.db
        provider: local
        online_store:
            path: data/online_store.db
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(
            (Path(__file__).parent / "example_feature_repo_1.py").read_text())

        result = runner.run(["apply"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        fs = FeatureStore(repo_path=str(repo_path))
        assertpy.assert_that(fs.list_feature_views()).is_length(3)

        result = runner.run(["teardown"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
示例#7
0
文件: cli.py 项目: feast-dev/feast
def serve_command(ctx: click.Context, host: str, port: int,
                  no_access_log: bool):
    """Start a feature server locally on a given port."""
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))

    store.serve(host, port, no_access_log)
    def test_bigquery_query_to_datastore_correctness(self):
        # create dataset
        ts = pd.Timestamp.now(tz="UTC").round("ms")
        data = {
            "id": [1, 2, 1],
            "value": [0.1, 0.2, 0.3],
            "ts_1": [ts - timedelta(minutes=2), ts, ts],
            "created_ts": [ts, ts, ts],
        }
        df = pd.DataFrame.from_dict(data)

        # load dataset into BigQuery
        job_config = bigquery.LoadJobConfig()
        table_id = f"{self.gcp_project}.{self.bigquery_dataset}.query_correctness_{int(time.time())}"
        query = f"SELECT * FROM `{table_id}`"
        job = self.client.load_table_from_dataframe(df,
                                                    table_id,
                                                    job_config=job_config)
        job.result()

        # create FeatureView
        fv = FeatureView(
            name="test_bq_query_correctness",
            entities=["driver_id"],
            features=[Feature("value", ValueType.FLOAT)],
            ttl=timedelta(minutes=5),
            input=BigQuerySource(
                event_timestamp_column="ts",
                created_timestamp_column="created_ts",
                field_mapping={
                    "ts_1": "ts",
                    "id": "driver_id"
                },
                date_partition_column="",
                query=query,
            ),
        )
        config = RepoConfig(
            metadata_store="./metadata.db",
            project=f"test_bq_query_correctness_{int(time.time())}",
            provider="gcp",
        )
        fs = FeatureStore(config=config)
        fs.apply([fv])

        # run materialize()
        fs.materialize(
            [fv.name],
            datetime.utcnow() - timedelta(minutes=5),
            datetime.utcnow() - timedelta(minutes=0),
        )

        # check result of materialize()
        response_dict = fs.get_online_features([f"{fv.name}:value"],
                                               [{
                                                   "driver_id": 1
                                               }]).to_dict()
        assert abs(response_dict[f"{fv.name}:value"][0] - 0.3) < 1e-6
示例#9
0
def feature_store_with_s3_registry():
    return FeatureStore(config=RepoConfig(
        registry=
        f"s3://feast-integration-tests/registries/{int(time.time() * 1000)}/registry.db",
        project="default",
        provider="aws",
        online_store=DynamoDBOnlineStoreConfig(region="us-west-2"),
        offline_store=FileOfflineStoreConfig(),
    ))
示例#10
0
def test_apply_remote_repo():
    fd, registry_path = mkstemp()
    fd, online_store_path = mkstemp()
    return FeatureStore(config=RepoConfig(
        registry=registry_path,
        project="default",
        provider="local",
        online_store=SqliteOnlineStoreConfig(path=online_store_path),
    ))
示例#11
0
def feature_store_with_local_registry():
    fd, registry_path = mkstemp()
    fd, online_store_path = mkstemp()
    return FeatureStore(config=RepoConfig(
        registry=registry_path,
        project="default",
        provider="local",
        online_store=SqliteOnlineStoreConfig(path=online_store_path),
    ))
示例#12
0
 def feature_store_with_local_registry(self):
     fd, registry_path = mkstemp()
     fd, online_store_path = mkstemp()
     return FeatureStore(config=RepoConfig(
         metadata_store=registry_path,
         project="default",
         provider="local",
         online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig(
             path=online_store_path)),
     ))
示例#13
0
def test_exception_usage_off(dummy_exporter, enabling_toggle):
    enabling_toggle.__bool__.return_value = False

    _reload_feast()
    from feast.feature_store import FeatureStore

    with pytest.raises(OSError):
        FeatureStore("/tmp/non_existent_directory")

    assert not dummy_exporter
def prep_bq_fs_and_fv(
    bq_source_type: str, ) -> Iterator[Tuple[FeatureStore, FeatureView]]:
    client = bigquery.Client()
    gcp_project = client.project
    bigquery_dataset = "test_ingestion"
    dataset = bigquery.Dataset(f"{gcp_project}.{bigquery_dataset}")
    client.create_dataset(dataset, exists_ok=True)
    dataset.default_table_expiration_ms = (1000 * 60 * 60 * 24 * 14
                                           )  # 2 weeks in milliseconds
    client.update_dataset(dataset, ["default_table_expiration_ms"])

    df = create_dataset()

    job_config = bigquery.LoadJobConfig()
    table_ref = f"{gcp_project}.{bigquery_dataset}.{bq_source_type}_correctness_{int(time.time_ns())}"
    query = f"SELECT * FROM `{table_ref}`"
    job = client.load_table_from_dataframe(df,
                                           table_ref,
                                           job_config=job_config)
    job.result()

    bigquery_source = BigQuerySource(
        table_ref=table_ref if bq_source_type == "table" else None,
        query=query if bq_source_type == "query" else None,
        event_timestamp_column="ts",
        created_timestamp_column="created_ts",
        date_partition_column="",
        field_mapping={
            "ts_1": "ts",
            "id": "driver_id"
        },
    )

    fv = driver_feature_view(bigquery_source)
    e = Entity(
        name="driver",
        description="id for driver",
        join_key="driver_id",
        value_type=ValueType.INT32,
    )
    with tempfile.TemporaryDirectory() as repo_dir_name:
        config = RepoConfig(
            registry=str(Path(repo_dir_name) / "registry.db"),
            project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
            provider="gcp",
            online_store=DatastoreOnlineStoreConfig(
                namespace="integration_test"),
        )
        fs = FeatureStore(config=config)
        fs.apply([fv, e])

        yield fs, fv

        fs.teardown()
示例#15
0
文件: cli.py 项目: qooba/feast
def disable_alpha_features(ctx: click.Context):
    """
    Disables all alpha features
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    repo_path = str(repo)
    store = FeatureStore(repo_path=repo_path)

    store.config.flags = None
    store.config.write_to_path(Path(repo_path))
示例#16
0
def test_exception_usage_on(dummy_exporter, enabling_toggle):
    _reload_feast()
    from feast.feature_store import FeatureStore

    with pytest.raises(OSError):
        FeatureStore("/tmp/non_existent_directory")

    assert len(dummy_exporter) == 1
    assert {
        "entrypoint": "feast.feature_store.FeatureStore.__init__",
        "exception": repr(FileNotFoundError(2, "No such file or directory")),
    }.items() <= dummy_exporter[0].items()
示例#17
0
def _prepare_registry_and_repo(repo_config, repo_path):
    store = FeatureStore(config=repo_config)
    project = store.project
    if not is_valid_name(project):
        print(
            f"{project} is not valid. Project name should only have "
            f"alphanumerical values and underscores but not start with an underscore."
        )
        sys.exit(1)
    registry = store.registry
    sys.dont_write_bytecode = True
    repo = parse_repo(repo_path)
    return project, registry, repo, store
示例#18
0
def feature_view_list():
    """
    List all feature views
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))
    table = []
    for feature_view in store.list_feature_views():
        table.append([feature_view.name, feature_view.entities])

    from tabulate import tabulate

    print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
示例#19
0
def benchmark_writes():
    project_id = "test" + "".join(
        random.choice(string.ascii_lowercase + string.digits) for _ in range(10)
    )

    with tempfile.TemporaryDirectory() as temp_dir:
        store = FeatureStore(
            config=RepoConfig(
                registry=os.path.join(temp_dir, "registry.db"),
                project=project_id,
                provider="gcp",
            )
        )

        # This is just to set data source to something, we're not reading from parquet source here.
        parquet_path = os.path.join(temp_dir, "data.parquet")

        driver = Entity(name="driver_id", value_type=ValueType.INT64)
        table = create_driver_hourly_stats_feature_view(
            create_driver_hourly_stats_source(parquet_path=parquet_path)
        )
        store.apply([table, driver])

        provider = store._get_provider()

        end_date = datetime.utcnow()
        start_date = end_date - timedelta(days=14)
        customers = list(range(100))
        data = create_driver_hourly_stats_df(customers, start_date, end_date)

        # Show the data for reference
        print(data)
        proto_data = _convert_arrow_to_proto(
            pa.Table.from_pandas(data), table, ["driver_id"]
        )

        # Write it
        with tqdm(total=len(proto_data)) as progress:
            provider.online_write_batch(
                project=store.project,
                table=table,
                data=proto_data,
                progress=progress.update,
            )

        registry_tables = store.list_feature_views()
        registry_entities = store.list_entities()
        provider.teardown_infra(
            store.project, tables=registry_tables, entities=registry_entities
        )
示例#20
0
def feature_view_list(ctx: click.Context):
    """
    List all feature views
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for feature_view in store.list_feature_views():
        table.append([feature_view.name, feature_view.entities])

    from tabulate import tabulate

    print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
示例#21
0
文件: cli.py 项目: qooba/feast
def enable_alpha_features(ctx: click.Context):
    """
    Enables all alpha features
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    repo_path = str(repo)
    store = FeatureStore(repo_path=repo_path)

    if store.config.flags is None:
        store.config.flags = {}
    for flag_name in flags.FLAG_NAMES:
        store.config.flags[flag_name] = True
    store.config.write_to_path(Path(repo_path))
示例#22
0
文件: cli.py 项目: qooba/feast
def on_demand_feature_view_list(ctx: click.Context):
    """
    [Experimental] List all on demand feature views
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for on_demand_feature_view in store.list_on_demand_feature_views():
        table.append([on_demand_feature_view.name])

    from tabulate import tabulate

    print(tabulate(table, headers=["NAME"], tablefmt="plain"))
示例#23
0
文件: cli.py 项目: qooba/feast
def endpoint(ctx: click.Context):
    """
    Display feature server endpoints.
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    endpoint = store.get_feature_server_endpoint()
    if endpoint is not None:
        _logger.info(
            f"Feature server endpoint: {Style.BRIGHT + Fore.GREEN}{endpoint}{Style.RESET_ALL}"
        )
    else:
        _logger.info("There is no active feature server.")
示例#24
0
文件: cli.py 项目: danielsiwiec/feast
def materialize_incremental_command(end_ts: str, views: List[str]):
    """
    Run an incremental materialization job to ingest new data into the online store. Feast will read
    all data from the previously ingested point to END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registered Feature
    Views will be incrementally materialized.

    END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))
    store.materialize_incremental(
        feature_views=None if not views else views,
        end_date=datetime.fromisoformat(end_ts),
    )
示例#25
0
文件: cli.py 项目: tleyden/feast
def materialize_command(repo_path: str, start_ts: str, end_ts: str, views: List[str]):
    """
    Run a (non-incremental) materialization job to ingest data into the online store. Feast
    will read all data between START_TS and END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registred Feature
    Views will be materialized.

    START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    store = FeatureStore(repo_path=repo_path)
    store.materialize(
        feature_views=None if not views else views,
        start_date=datetime.fromisoformat(start_ts).replace(tzinfo=utc),
        end_date=datetime.fromisoformat(end_ts).replace(tzinfo=utc),
    )
示例#26
0
def test_basic() -> None:
    project_id = "".join(
        random.choice(string.ascii_lowercase + string.digits)
        for _ in range(10))
    runner = CliRunner()
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
            project: {project_id}
            registry: {data_path / "registry.db"}
            provider: aws
            online_store:
                type: dynamodb
                region: us-west-2
            offline_store:
              type: redshift
              cluster_id: feast-integration-tests
              region: us-west-2
              user: admin
              database: feast
              s3_staging_location: s3://feast-integration-tests/redshift
              iam_role: arn:aws:iam::402087665549:role/redshift_s3_access_role
            """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_1.py"))

        result = runner.run(["apply"], cwd=repo_path)
        assert result.returncode == 0

        # Doing another apply should be a no op, and should not cause errors
        result = runner.run(["apply"], cwd=repo_path)
        assert result.returncode == 0

        basic_rw_test(
            FeatureStore(repo_path=str(repo_path), config=None),
            view_name="driver_locations",
        )

        result = runner.run(["teardown"], cwd=repo_path)
        assert result.returncode == 0
示例#27
0
文件: cli.py 项目: qooba/feast
def disable_alpha_feature(ctx: click.Context, name: str):
    """
    Disables an alpha feature
    """
    if name not in flags.FLAG_NAMES:
        raise ValueError(f"Flag name, {name}, not valid.")

    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    repo_path = str(repo)
    store = FeatureStore(repo_path=repo_path)

    if store.config.flags is None or name not in store.config.flags:
        return
    store.config.flags[name] = False
    store.config.write_to_path(Path(repo_path))
示例#28
0
文件: cli.py 项目: qooba/feast
def list_alpha_features(ctx: click.Context):
    """
    Lists all alpha features
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    repo_path = str(repo)
    store = FeatureStore(repo_path=repo_path)

    flags_to_show = flags.FLAG_NAMES.copy()
    flags_to_show.remove(flags.FLAG_ALPHA_FEATURES_NAME)
    print("Alpha features:")
    for flag in flags_to_show:
        enabled_string = ("enabled" if flags_helper.feature_flag_enabled(
            store.config, flag) else "disabled")
        print(f"{flag}: {enabled_string}")
示例#29
0
def entity_list():
    """
    List all entities
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))
    table = []
    for entity in store.list_entities():
        table.append([entity.name, entity.description, entity.value_type])

    from tabulate import tabulate

    print(
        tabulate(table,
                 headers=["NAME", "DESCRIPTION", "TYPE"],
                 tablefmt="plain"))
示例#30
0
def entity_list(ctx: click.Context):
    """
    List all entities
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for entity in store.list_entities():
        table.append([entity.name, entity.description, entity.value_type])

    from tabulate import tabulate

    print(
        tabulate(table,
                 headers=["NAME", "DESCRIPTION", "TYPE"],
                 tablefmt="plain"))