Exemplo n.º 1
0
def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None:
    now = datetime.utcnow()
    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize([fv.name], start_date, end_date)

    # check result of materialize()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 1
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 0.3) < 1e-6

    # check prior value for materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 4) < 1e-6

    # run materialize_incremental()
    fs.materialize_incremental(
        [fv.name],
        now - timedelta(seconds=0),
    )

    # check result of materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 5) < 1e-6
Exemplo n.º 2
0
def check_offline_and_online_features(
    fs: FeatureStore,
    fv: FeatureView,
    driver_id: int,
    event_timestamp: datetime,
    expected_value: Optional[float],
) -> None:
    # Check online store
    response_dict = fs.get_online_features(
        [f"{fv.name}:value"], [{"driver": driver_id}]
    ).to_dict()

    if expected_value:
        assert abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6
    else:
        assert response_dict[f"{fv.name}__value"][0] is None

    # Check offline store
    df = fs.get_historical_features(
        entity_df=pd.DataFrame.from_dict(
            {"driver_id": [driver_id], "event_timestamp": [event_timestamp]}
        ),
        feature_refs=[f"{fv.name}:value"],
    ).to_df()

    if expected_value:
        assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6
    else:
        df = df.where(pd.notnull(df), None)
        assert df.to_dict()[f"{fv.name}__value"][0] is None
    def test_bigquery_query_to_datastore_correctness(self):
        # create dataset
        ts = pd.Timestamp.now(tz="UTC").round("ms")
        data = {
            "id": [1, 2, 1],
            "value": [0.1, 0.2, 0.3],
            "ts_1": [ts - timedelta(minutes=2), ts, ts],
            "created_ts": [ts, ts, ts],
        }
        df = pd.DataFrame.from_dict(data)

        # load dataset into BigQuery
        job_config = bigquery.LoadJobConfig()
        table_id = f"{self.gcp_project}.{self.bigquery_dataset}.query_correctness_{int(time.time())}"
        query = f"SELECT * FROM `{table_id}`"
        job = self.client.load_table_from_dataframe(df,
                                                    table_id,
                                                    job_config=job_config)
        job.result()

        # create FeatureView
        fv = FeatureView(
            name="test_bq_query_correctness",
            entities=["driver_id"],
            features=[Feature("value", ValueType.FLOAT)],
            ttl=timedelta(minutes=5),
            input=BigQuerySource(
                event_timestamp_column="ts",
                created_timestamp_column="created_ts",
                field_mapping={
                    "ts_1": "ts",
                    "id": "driver_id"
                },
                date_partition_column="",
                query=query,
            ),
        )
        config = RepoConfig(
            metadata_store="./metadata.db",
            project=f"test_bq_query_correctness_{int(time.time())}",
            provider="gcp",
        )
        fs = FeatureStore(config=config)
        fs.apply([fv])

        # run materialize()
        fs.materialize(
            [fv.name],
            datetime.utcnow() - timedelta(minutes=5),
            datetime.utcnow() - timedelta(minutes=0),
        )

        # check result of materialize()
        response_dict = fs.get_online_features([f"{fv.name}:value"],
                                               [{
                                                   "driver_id": 1
                                               }]).to_dict()
        assert abs(response_dict[f"{fv.name}:value"][0] - 0.3) < 1e-6
def check_offline_and_online_features(
    fs: FeatureStore,
    fv: FeatureView,
    driver_id: int,
    event_timestamp: datetime,
    expected_value: Optional[float],
    full_feature_names: bool,
) -> None:
    # Check online store
    response_dict = fs.get_online_features(
        [f"{fv.name}:value"],
        [{
            "driver": driver_id
        }],
        full_feature_names=full_feature_names,
    ).to_dict()

    if full_feature_names:
        if expected_value:
            assert abs(response_dict[f"{fv.name}__value"][0] -
                       expected_value) < 1e-6
        else:
            assert response_dict[f"{fv.name}__value"][0] is None
    else:
        if expected_value:
            assert abs(response_dict["value"][0] - expected_value) < 1e-6
        else:
            assert response_dict["value"][0] is None

    # Check offline store
    df = fs.get_historical_features(
        entity_df=pd.DataFrame.from_dict({
            "driver_id": [driver_id],
            "event_timestamp": [event_timestamp]
        }),
        features=[f"{fv.name}:value"],
        full_feature_names=full_feature_names,
    ).to_df()

    if full_feature_names:
        if expected_value:
            assert abs(df.to_dict()[f"{fv.name}__value"][0] -
                       expected_value) < 1e-6
        else:
            assert math.isnan(df.to_dict()[f"{fv.name}__value"][0])
    else:
        if expected_value:
            assert abs(df.to_dict()["value"][0] - expected_value) < 1e-6
        else:
            assert math.isnan(df.to_dict()["value"][0])