def test_ingest_into_bq( feast_client: Client, customer_entity: Entity, driver_entity: Entity, bq_dataframe: pd.DataFrame, bq_dataset: str, pytestconfig, ): bq_project = pytestconfig.getoption("bq_project") bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}" ft = FeatureTable( name="basic_featuretable", entities=["driver_id", "customer_id"], features=[ Feature(name="dev_feature_float", dtype=ValueType.FLOAT), Feature(name="dev_feature_string", dtype=ValueType.STRING), ], max_age=Duration(seconds=3600), batch_source=BigQuerySource( table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}", event_timestamp_column="datetime", created_timestamp_column="timestamp", ), ) # ApplyEntity feast_client.apply(customer_entity) feast_client.apply(driver_entity) # ApplyFeatureTable feast_client.apply(ft) feast_client.ingest(ft, bq_dataframe, timeout=120) bq_client = bigquery.Client(project=bq_project) # Poll BQ for table until the table has been created def try_get_table(): try: table = bq_client.get_table( bigquery.TableReference( bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id ) ) except NotFound: return None, False else: return table, True wait_retry_backoff( retry_fn=try_get_table, timeout_secs=30, timeout_msg="Timed out trying to get bigquery table", ) query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`" job = bq_client.query(query_string) query_df = job.to_dataframe() assert_frame_equal(query_df, bq_dataframe)
def test_ingest( client: Client, customer_entity: Entity, driver_entity: Entity, bq_featuretable: FeatureTable, bq_dataset: pd.DataFrame, bq_table_id: str, ): gcp_project, _ = bq_table_id.split(":") bq_table_id = bq_table_id.replace(":", ".") # ApplyEntity client.apply_entity(customer_entity) client.apply_entity(driver_entity) # ApplyFeatureTable client.apply_feature_table(bq_featuretable) client.ingest(bq_featuretable, bq_dataset, timeout=120) from google.api_core.exceptions import NotFound from google.cloud import bigquery bq_client = bigquery.Client(project=gcp_project) # Poll BQ for table until the table has been created def try_get_table(): table_exist = False table_resp = None try: table_resp = bq_client.get_table(bq_table_id) if table_resp and table_resp.table_id == bq_table_id.split( ".")[-1]: table_exist = True except NotFound: pass return table_resp, table_exist wait_retry_backoff( retry_fn=try_get_table, timeout_secs=30, timeout_msg="Timed out trying to get bigquery table", ) query_string = f"SELECT * FROM `{bq_table_id}`" job = bq_client.query(query_string) query_df = job.to_dataframe() assert_frame_equal(query_df, bq_dataset) bq_client.delete_table(bq_table_id, not_found_ok=True)