Пример #1
0
def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None:
    now = datetime.utcnow()
    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize([fv.name], start_date, end_date)

    # check result of materialize()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 1
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 0.3) < 1e-6

    # check prior value for materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 4) < 1e-6

    # run materialize_incremental()
    fs.materialize_incremental(
        [fv.name],
        now - timedelta(seconds=0),
    )

    # check result of materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 5) < 1e-6
Пример #2
0
def run_offline_online_store_consistency_test(
    fs: FeatureStore, fv: FeatureView
) -> None:
    now = datetime.utcnow()
    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date)

    # check result of materialize()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=1, event_timestamp=end_date, expected_value=0.3
    )

    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=2, event_timestamp=end_date, expected_value=None
    )

    # check prior value for materialize_incremental()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=3, event_timestamp=end_date, expected_value=4
    )

    # run materialize_incremental()
    fs.materialize_incremental(feature_views=[fv.name], end_date=now)

    # check result of materialize_incremental()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=3, event_timestamp=now, expected_value=5
    )
    def test_bigquery_query_to_datastore_correctness(self):
        # create dataset
        ts = pd.Timestamp.now(tz="UTC").round("ms")
        data = {
            "id": [1, 2, 1],
            "value": [0.1, 0.2, 0.3],
            "ts_1": [ts - timedelta(minutes=2), ts, ts],
            "created_ts": [ts, ts, ts],
        }
        df = pd.DataFrame.from_dict(data)

        # load dataset into BigQuery
        job_config = bigquery.LoadJobConfig()
        table_id = f"{self.gcp_project}.{self.bigquery_dataset}.query_correctness_{int(time.time())}"
        query = f"SELECT * FROM `{table_id}`"
        job = self.client.load_table_from_dataframe(df,
                                                    table_id,
                                                    job_config=job_config)
        job.result()

        # create FeatureView
        fv = FeatureView(
            name="test_bq_query_correctness",
            entities=["driver_id"],
            features=[Feature("value", ValueType.FLOAT)],
            ttl=timedelta(minutes=5),
            input=BigQuerySource(
                event_timestamp_column="ts",
                created_timestamp_column="created_ts",
                field_mapping={
                    "ts_1": "ts",
                    "id": "driver_id"
                },
                date_partition_column="",
                query=query,
            ),
        )
        config = RepoConfig(
            metadata_store="./metadata.db",
            project=f"test_bq_query_correctness_{int(time.time())}",
            provider="gcp",
        )
        fs = FeatureStore(config=config)
        fs.apply([fv])

        # run materialize()
        fs.materialize(
            [fv.name],
            datetime.utcnow() - timedelta(minutes=5),
            datetime.utcnow() - timedelta(minutes=0),
        )

        # check result of materialize()
        response_dict = fs.get_online_features([f"{fv.name}:value"],
                                               [{
                                                   "driver_id": 1
                                               }]).to_dict()
        assert abs(response_dict[f"{fv.name}:value"][0] - 0.3) < 1e-6
Пример #4
0
def materialize_command(repo_path: str, start_ts: str, end_ts: str, views: List[str]):
    """
    Run a (non-incremental) materialization job to ingest data into the online store. Feast
    will read all data between START_TS and END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registred Feature
    Views will be materialized.

    START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    store = FeatureStore(repo_path=repo_path)
    store.materialize(
        feature_views=None if not views else views,
        start_date=datetime.fromisoformat(start_ts).replace(tzinfo=utc),
        end_date=datetime.fromisoformat(end_ts).replace(tzinfo=utc),
    )
Пример #5
0
def materialize_command(ctx: click.Context, start_ts: str, end_ts: str,
                        views: List[str]):
    """
    Run a (non-incremental) materialization job to ingest data into the online store. Feast
    will read all data between START_TS and END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registered Feature
    Views will be materialized.

    START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    store.materialize(
        feature_views=None if not views else views,
        start_date=utils.make_tzaware(datetime.fromisoformat(start_ts)),
        end_date=utils.make_tzaware(datetime.fromisoformat(end_ts)),
    )
Пример #6
0
    def test_bigquery_ingestion_correctness(self):
        # create dataset
        ts = pd.Timestamp.now(tz="UTC").round("ms")
        checked_value = (
            random.random()
        )  # random value so test doesn't still work if no values written to online store
        data = {
            "id": [1, 2, 1],
            "value": [0.1, 0.2, checked_value],
            "ts_1": [ts - timedelta(minutes=2), ts, ts],
            "created_ts": [ts, ts, ts],
        }
        df = pd.DataFrame.from_dict(data)

        # load dataset into BigQuery
        job_config = bigquery.LoadJobConfig()
        table_id = (
            f"{self.gcp_project}.{self.bigquery_dataset}.correctness_{int(time.time())}"
        )
        job = self.client.load_table_from_dataframe(df,
                                                    table_id,
                                                    job_config=job_config)
        job.result()

        # create FeatureView
        fv = FeatureView(
            name="test_bq_correctness",
            entities=["driver_id"],
            features=[Feature("value", ValueType.FLOAT)],
            ttl=timedelta(minutes=5),
            input=BigQuerySource(
                event_timestamp_column="ts",
                table_ref=table_id,
                created_timestamp_column="created_ts",
                field_mapping={
                    "ts_1": "ts",
                    "id": "driver_id"
                },
                date_partition_column="",
            ),
        )
        config = RepoConfig(
            metadata_store="./metadata.db",
            project="default",
            provider="gcp",
            online_store=OnlineStoreConfig(
                local=LocalOnlineStoreConfig("online_store.db")),
        )
        fs = FeatureStore(config=config)
        fs.apply([fv])

        # run materialize()
        fs.materialize(
            ["test_bq_correctness"],
            datetime.utcnow() - timedelta(minutes=5),
            datetime.utcnow() - timedelta(minutes=0),
        )

        # check result of materialize()
        entity_key = EntityKeyProto(entity_names=["driver_id"],
                                    entity_values=[ValueProto(int64_val=1)])
        t, val = fs._get_provider().online_read("default", fv, entity_key)
        assert abs(val["value"].double_val - checked_value) < 1e-6