def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None: now = datetime.utcnow() # Run materialize() # use both tz-naive & tz-aware timestamps to test that they're both correctly handled start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) end_date = now - timedelta(hours=2) fs.materialize([fv.name], start_date, end_date) # check result of materialize() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 1 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 0.3) < 1e-6 # check prior value for materialize_incremental() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 3 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 4) < 1e-6 # run materialize_incremental() fs.materialize_incremental( [fv.name], now - timedelta(seconds=0), ) # check result of materialize_incremental() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 3 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 5) < 1e-6
def run_offline_online_store_consistency_test( fs: FeatureStore, fv: FeatureView ) -> None: now = datetime.utcnow() # Run materialize() # use both tz-naive & tz-aware timestamps to test that they're both correctly handled start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) end_date = now - timedelta(hours=2) fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) # check result of materialize() check_offline_and_online_features( fs=fs, fv=fv, driver_id=1, event_timestamp=end_date, expected_value=0.3 ) check_offline_and_online_features( fs=fs, fv=fv, driver_id=2, event_timestamp=end_date, expected_value=None ) # check prior value for materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=end_date, expected_value=4 ) # run materialize_incremental() fs.materialize_incremental(feature_views=[fv.name], end_date=now) # check result of materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=now, expected_value=5 )
def test_bigquery_query_to_datastore_correctness(self): # create dataset ts = pd.Timestamp.now(tz="UTC").round("ms") data = { "id": [1, 2, 1], "value": [0.1, 0.2, 0.3], "ts_1": [ts - timedelta(minutes=2), ts, ts], "created_ts": [ts, ts, ts], } df = pd.DataFrame.from_dict(data) # load dataset into BigQuery job_config = bigquery.LoadJobConfig() table_id = f"{self.gcp_project}.{self.bigquery_dataset}.query_correctness_{int(time.time())}" query = f"SELECT * FROM `{table_id}`" job = self.client.load_table_from_dataframe(df, table_id, job_config=job_config) job.result() # create FeatureView fv = FeatureView( name="test_bq_query_correctness", entities=["driver_id"], features=[Feature("value", ValueType.FLOAT)], ttl=timedelta(minutes=5), input=BigQuerySource( event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping={ "ts_1": "ts", "id": "driver_id" }, date_partition_column="", query=query, ), ) config = RepoConfig( metadata_store="./metadata.db", project=f"test_bq_query_correctness_{int(time.time())}", provider="gcp", ) fs = FeatureStore(config=config) fs.apply([fv]) # run materialize() fs.materialize( [fv.name], datetime.utcnow() - timedelta(minutes=5), datetime.utcnow() - timedelta(minutes=0), ) # check result of materialize() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 1 }]).to_dict() assert abs(response_dict[f"{fv.name}:value"][0] - 0.3) < 1e-6
def materialize_command(repo_path: str, start_ts: str, end_ts: str, views: List[str]): """ Run a (non-incremental) materialization job to ingest data into the online store. Feast will read all data between START_TS and END_TS from the offline store and write it to the online store. If you don't specify feature view names using --views, all registred Feature Views will be materialized. START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01' """ store = FeatureStore(repo_path=repo_path) store.materialize( feature_views=None if not views else views, start_date=datetime.fromisoformat(start_ts).replace(tzinfo=utc), end_date=datetime.fromisoformat(end_ts).replace(tzinfo=utc), )
def materialize_command(ctx: click.Context, start_ts: str, end_ts: str, views: List[str]): """ Run a (non-incremental) materialization job to ingest data into the online store. Feast will read all data between START_TS and END_TS from the offline store and write it to the online store. If you don't specify feature view names using --views, all registered Feature Views will be materialized. START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01' """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) store.materialize( feature_views=None if not views else views, start_date=utils.make_tzaware(datetime.fromisoformat(start_ts)), end_date=utils.make_tzaware(datetime.fromisoformat(end_ts)), )
def test_bigquery_ingestion_correctness(self): # create dataset ts = pd.Timestamp.now(tz="UTC").round("ms") checked_value = ( random.random() ) # random value so test doesn't still work if no values written to online store data = { "id": [1, 2, 1], "value": [0.1, 0.2, checked_value], "ts_1": [ts - timedelta(minutes=2), ts, ts], "created_ts": [ts, ts, ts], } df = pd.DataFrame.from_dict(data) # load dataset into BigQuery job_config = bigquery.LoadJobConfig() table_id = ( f"{self.gcp_project}.{self.bigquery_dataset}.correctness_{int(time.time())}" ) job = self.client.load_table_from_dataframe(df, table_id, job_config=job_config) job.result() # create FeatureView fv = FeatureView( name="test_bq_correctness", entities=["driver_id"], features=[Feature("value", ValueType.FLOAT)], ttl=timedelta(minutes=5), input=BigQuerySource( event_timestamp_column="ts", table_ref=table_id, created_timestamp_column="created_ts", field_mapping={ "ts_1": "ts", "id": "driver_id" }, date_partition_column="", ), ) config = RepoConfig( metadata_store="./metadata.db", project="default", provider="gcp", online_store=OnlineStoreConfig( local=LocalOnlineStoreConfig("online_store.db")), ) fs = FeatureStore(config=config) fs.apply([fv]) # run materialize() fs.materialize( ["test_bq_correctness"], datetime.utcnow() - timedelta(minutes=5), datetime.utcnow() - timedelta(minutes=0), ) # check result of materialize() entity_key = EntityKeyProto(entity_names=["driver_id"], entity_values=[ValueProto(int64_val=1)]) t, val = fs._get_provider().online_read("default", fv, entity_key) assert abs(val["value"].double_val - checked_value) < 1e-6