def test_apply_feature_set_success(self, client): # Create Feature Sets fs1 = FeatureSet("my-feature-set-1") fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64)) fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING)) fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64)) fs2 = FeatureSet("my-feature-set-2") fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST)) fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST)) fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64)) # Register Feature Set with Core client.apply(fs1) client.apply(fs2) feature_sets = client.list_feature_sets() # List Feature Sets assert ( len(feature_sets) == 2 and feature_sets[0].name == "my-feature-set-1" and feature_sets[0].features[0].name == "fs1-my-feature-1" and feature_sets[0].features[0].dtype == ValueType.INT64 and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST )
def test_list_entities_and_features(client): customer_entity = Entity("customer_id", ValueType.INT64) driver_entity = Entity("driver_id", ValueType.INT64) customer_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT, labels={"key1":"val1"}) customer_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT) driver_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT) driver_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT, labels={"key1":"val1"}) filter_by_project_entity_labels_expected = dict([ ("customer:rating", customer_feature_rating) ]) filter_by_project_entity_expected = dict([ ("driver:cost", driver_feature_cost), ("driver:rating", driver_feature_rating) ]) filter_by_project_labels_expected = dict([ ("customer:rating", customer_feature_rating), ("driver:cost", driver_feature_cost) ]) customer_fs = FeatureSet( "customer", features=[ customer_feature_rating, customer_feature_cost ], entities=[customer_entity], max_age=Duration(seconds=100) ) driver_fs = FeatureSet( "driver", features=[ driver_feature_rating, driver_feature_cost ], entities=[driver_entity], max_age=Duration(seconds=100) ) client.set_project(PROJECT_NAME) client.apply(customer_fs) client.apply(driver_fs) # Test for listing of features # Case 1: Filter by: project, entities and labels filter_by_project_entity_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["customer_id"], labels={"key1":"val1"}) # Case 2: Filter by: project, entities filter_by_project_entity_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["driver_id"]) # Case 3: Filter by: project, labels filter_by_project_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, labels={"key1":"val1"}) assert set(filter_by_project_entity_labels_expected) == set(filter_by_project_entity_labels_actual) assert set(filter_by_project_entity_expected) == set(filter_by_project_entity_actual) assert set(filter_by_project_labels_expected) == set(filter_by_project_labels_actual)
def test_multiple_featureset_joins(client): fs1 = FeatureSet( "feature_set_1", features=[Feature("feature_value", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) fs2 = FeatureSet( "feature_set_2", features=[Feature("other_feature_value", ValueType.INT64)], entities=[Entity("other_entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(fs1) time.sleep(10) fs1 = client.get_feature_set(name="feature_set_1", version=1) client.apply(fs2) time.sleep(10) fs2 = client.get_feature_set(name="feature_set_2", version=1) N_ROWS = 10 time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) features_1_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": [f"{i}" for i in range(N_ROWS)], }) client.ingest(fs1, features_1_df) features_2_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "other_entity_id": [i for i in range(N_ROWS)], "other_feature_value": [i for i in range(N_ROWS)], }) client.ingest(fs2, features_2_df) entity_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)], }) feature_retrieval_job = client.get_batch_features( entity_rows=entity_df, feature_ids=[ "feature_set_1:1:feature_value", "feature_set_2:1:other_feature_value" ]) output = feature_retrieval_job.to_dataframe() print(output.head()) assert output["entity_id"].to_list() == [ int(i) for i in output["feature_set_1_v1_feature_value"].to_list() ] assert output["other_entity_id"].to_list( ) == output["feature_set_2_v1_other_feature_value"].to_list()
def test_unequal_feature_set_based_on_labels(): fs1 = FeatureSet("my-feature-set") fs2 = FeatureSet("my-feature-set") assert fs1 == fs2 fs1.set_label("k1", "v1") fs2.set_label("k1", "v1") assert fs1 == fs2 fs2.set_label("k1", "unequal") assert not fs1 == fs2
def test_basic_register_feature_set_success(client): # Register feature set without project cust_trans_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") driver_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/driver_fs.yaml") client.apply(cust_trans_fs_expected) client.apply(driver_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions") assert cust_trans_fs_actual == cust_trans_fs_expected driver_fs_actual = client.get_feature_set("driver") assert driver_fs_actual == driver_fs_expected # Register feature set with project cust_trans_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") client.set_project(PROJECT_NAME) client.apply(cust_trans_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions", project=PROJECT_NAME) assert cust_trans_fs_actual == cust_trans_fs_expected # Register feature set with labels driver_unlabelled_fs = FeatureSet( "driver_unlabelled", features=[ Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT) ], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) driver_labeled_fs_expected = FeatureSet( "driver_labeled", features=[ Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT) ], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), labels={"key1": "val1"}, ) client.set_project(PROJECT_NAME) client.apply(driver_unlabelled_fs) client.apply(driver_labeled_fs_expected) driver_fs_actual = client.list_feature_sets(project=PROJECT_NAME, labels={"key1": "val1"})[0] assert driver_fs_actual == driver_labeled_fs_expected # reset client's project for other tests client.set_project()
def test_feature_set_ingest_success(self, dataframe, client, mocker): client.set_project("project1") driver_fs = FeatureSet("driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test")) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer") as mocked_queue: # Ingest data into Feast client.ingest("driver-feature-set", dataframe)
def test_update_featureset_update_featureset_and_ingest_second_subset( client, update_featureset_dataframe): subset_columns = [ "datetime", "entity_id", "update_feature1", "update_feature3", "update_feature4", ] subset_df = update_featureset_dataframe.iloc[5:][subset_columns] update_fs = FeatureSet( "update_fs", entities=[Entity(name="entity_id", dtype=ValueType.INT64)], max_age=Duration(seconds=432000), ) update_fs.infer_fields_from_df(subset_df) client.apply(update_fs) # We keep retrying this ingestion until all values make it into the buffer. # This is a necessary step because bigquery streaming caches table schemas # and as a result, rows may be lost. while True: ingestion_id = client.ingest(feature_set=update_fs, source=subset_df) time.sleep(15) # wait for rows to get written to bq rows_ingested = get_rows_ingested(client, update_fs, ingestion_id) if rows_ingested == len(subset_df): print( f"Number of rows successfully ingested: {rows_ingested}. Continuing." ) break print( f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion." ) time.sleep(30) def check(): feature_retrieval_job = client.get_batch_features( entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:], feature_refs=[ "update_feature1", "update_feature3", "update_feature4", ], project=PROJECT_NAME, ) output = feature_retrieval_job.to_dataframe( timeout_sec=180).sort_values(by=["entity_id"]) print(output.head()) assert output["update_feature1"].to_list( ) == subset_df["update_feature1"].to_list() assert output["update_feature3"].to_list( ) == subset_df["update_feature3"].to_list() assert output["update_feature4"].to_list( ) == subset_df["update_feature4"].to_list() clean_up_remote_files(feature_retrieval_job.get_avro_files()) wait_for(check, timedelta(minutes=5))
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_order_by_creation_time(client): proc_time_fs = FeatureSet( "processing_time", features=[Feature("feature_value", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(proc_time_fs) time.sleep(10) proc_time_fs = client.get_feature_set(name="processing_time", version=1) time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) N_ROWS = 10 incorrect_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": ["WRONG"] * N_ROWS, }) correct_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": ["CORRECT"] * N_ROWS, }) client.ingest(proc_time_fs, incorrect_df) time.sleep(10) client.ingest(proc_time_fs, correct_df) feature_retrieval_job = client.get_batch_features( entity_rows=incorrect_df[["datetime", "entity_id"]], feature_ids=["processing_time:1:feature_value"]) output = feature_retrieval_job.to_dataframe() print(output.head()) assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT" ] * N_ROWS
def test_feature_set_ingest_fail_if_pending(self, dataframe, exception, test_client, mocker): with pytest.raises(exception): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest("driver-feature-set", dataframe, timeout=1)
def test_feature_set_types_success(self, client, dataframe, mocker): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core client.apply(all_types_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()), ) # Ingest data into Feast client.ingest(all_types_fs, dataframe=dataframe)
def test_register_feature_set(self, sqlite_store): fs = FeatureSet("my-feature-set") fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64)) fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) fs.add(Entity(name="my-entity-1", dtype=ValueType.INT64)) fs._version = 1 feature_set_spec_proto = fs.to_proto().spec sqlite_store.register_feature_set(feature_set_spec_proto) feature_row = FeatureRowProto.FeatureRow( feature_set="feature_set_1", event_timestamp=Timestamp(), fields=[ FieldProto.Field( name="feature_1", value=ValueProto.Value(float_val=1.2) ), FieldProto.Field( name="feature_2", value=ValueProto.Value(float_val=1.2) ), FieldProto.Field( name="feature_3", value=ValueProto.Value(float_val=1.2) ), ], ) # sqlite_store.upsert_feature_row(feature_set_proto, feature_row) assert True
def test_feature_set_ingest_throws_exception_if_kafka_down( self, dataframe, test_client, exception, mocker): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="localhost:4412", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) with pytest.raises(exception): test_client.ingest("driver-feature-set", dataframe)
def test_update_featureset_apply_featureset_and_ingest_first_subset( client, update_featureset_dataframe): subset_columns = [ "datetime", "entity_id", "update_feature1", "update_feature2" ] subset_df = update_featureset_dataframe.iloc[:5][subset_columns] update_fs = FeatureSet( "update_fs", entities=[Entity(name="entity_id", dtype=ValueType.INT64)], max_age=Duration(seconds=432000), ) update_fs.infer_fields_from_df(subset_df) client.apply(update_fs) client.ingest(feature_set=update_fs, source=subset_df) time.sleep(15) feature_retrieval_job = client.get_batch_features( entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5], feature_refs=[ f"{PROJECT_NAME}/update_feature1", f"{PROJECT_NAME}/update_feature2", ], ) output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"]) print(output.head()) assert output["update_feature1"].to_list( ) == subset_df["update_feature1"].to_list() assert output["update_feature2"].to_list( ) == subset_df["update_feature2"].to_list()
def create(name): """ Create a feature set """ feast_client = Client(core_url=feast_config.get_config_property_or_fail( "core_url")) # type: Client feast_client.apply(FeatureSet(name=name))
def test_from_feature_set(self): feature_set = FeatureSet("test", "test") feature_set.version = 2 ref = FeatureSetRef.from_feature_set(feature_set) assert ref.name == "test" assert ref.project == "test" assert ref.version == 2
def test_update_from_source_success(self, dataframe): fs = FeatureSet("driver-feature-set") fs.update_from_dataset( dataframe, column_mapping={ "entity_id": Entity(name="entity", dtype=ValueType.INT64) }, ) assert len(fs.features) == 3 and fs.features[1].name == "feature_2"
def test_feature_class_contains_labels(): fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"}) fs.add( Feature( name="my-feature-1", dtype=ValueType.INT64, labels={"feature_key1": "feature_val1"}, )) assert "feature_key1" in fs.features[0].labels.keys() assert fs.features[0].labels["feature_key1"] == "feature_val1"
def test_register_feature_set_with_labels(self, core_service_stub): feature_set_name = "test_feature_set_labels" feature_set_proto = FeatureSet(feature_set_name, PROJECT_NAME).to_proto() feature_set_proto.spec.labels[self.LABEL_KEY] = self.LABEL_VALUE self.apply_feature_set(core_service_stub, feature_set_proto) retrieved_feature_set = self.get_feature_set(core_service_stub, feature_set_name, PROJECT_NAME) assert self.LABEL_KEY in retrieved_feature_set.spec.labels assert retrieved_feature_set.spec.labels[self.LABEL_KEY] == self.LABEL_VALUE
def test_apply_all_featuresets(client): client.set_project(PROJECT_NAME) file_fs1 = FeatureSet( "file_feature_set", features=[Feature("feature_value1", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(file_fs1) gcs_fs1 = FeatureSet( "gcs_feature_set", features=[Feature("feature_value2", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(gcs_fs1) proc_time_fs = FeatureSet( "processing_time", features=[Feature("feature_value3", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(proc_time_fs) add_cols_fs = FeatureSet( "additional_columns", features=[Feature("feature_value4", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(add_cols_fs) historical_fs = FeatureSet( "historical", features=[Feature("feature_value5", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(historical_fs) fs1 = FeatureSet( "feature_set_1", features=[Feature("feature_value6", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) fs2 = FeatureSet( "feature_set_2", features=[Feature("other_feature_value7", ValueType.INT64)], entities=[Entity("other_entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(fs1) client.apply(fs2)
def test_register_feature_with_labels(self, core_service_stub): feature_set_name = "test_feature_labels" feature_set_proto = FeatureSet(feature_set_name, PROJECT_NAME, features=[Feature("rating", ValueType.INT64)]) \ .to_proto() feature_set_proto.spec.features[0].labels[self.LABEL_KEY] = self.LABEL_VALUE self.apply_feature_set(core_service_stub, feature_set_proto) retrieved_feature_set = self.get_feature_set(core_service_stub, feature_set_name, PROJECT_NAME) retrieved_feature = retrieved_feature_set.spec.features[0] assert self.LABEL_KEY in retrieved_feature.labels assert retrieved_feature.labels[self.LABEL_KEY] == self.LABEL_VALUE
def feature_stats_feature_set(client): fv_fs = FeatureSet( "feature_stats", features=[ Feature("strings", ValueType.STRING), Feature("ints", ValueType.INT64), Feature("floats", ValueType.FLOAT), ], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(fv_fs) return fv_fs
def test_feature_set_ingest_failure(self, client, dataframe, exception): with pytest.raises(exception): # Create feature set driver_fs = FeatureSet("driver-feature-set") # Update based on dataset driver_fs.infer_fields_from_df(dataframe) # Register with Feast core client.apply(driver_fs) # Ingest data into Feast client.ingest(driver_fs, dataframe=dataframe)
def test_get_batch_features_with_gs_path(client, gcs_path): gcs_fs1 = FeatureSet( "gcs_feature_set", features=[Feature("feature_value", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(gcs_fs1) gcs_fs1 = client.get_feature_set(name="gcs_feature_set", version=1) N_ROWS = 10 time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) features_1_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": [f"{i}" for i in range(N_ROWS)], }) client.ingest(gcs_fs1, features_1_df) # Rename column (datetime -> event_timestamp) features_1_df = features_1_df.rename( columns={"datetime": "event_timestamp"}) # Output file to local file_name = "gcs_feature_set.avro" to_avro(df=features_1_df, file_path_or_buffer=file_name) uri = urlparse(gcs_path) bucket = uri.hostname ts = int(time.time()) remote_path = str(uri.path).strip("/") + f"{ts}/{file_name}" # Upload file to gcs storage_client = storage.Client(project=None) bucket = storage_client.get_bucket(bucket) blob = bucket.blob(remote_path) blob.upload_from_filename(file_name) feature_retrieval_job = client.get_batch_features( entity_rows=f"{gcs_path}{ts}/*", feature_ids=["gcs_feature_set:1:feature_value"]) output = feature_retrieval_job.to_dataframe() print(output.head()) assert output["entity_id"].to_list() == [ int(i) for i in output["gcs_feature_set_v1_feature_value"].to_list() ]
def test_basic_ingest_retrieval_str(client): # Set to another project to test ingestion based on current project context client.set_project(PROJECT_NAME + "_NS1") customer_fs = FeatureSet( name="cust_fs", features=[ Feature(name="cust_rating", dtype=ValueType.INT64), Feature(name="cust_cost", dtype=ValueType.FLOAT), ], entities=[Entity("cust_id", ValueType.INT64)], max_age=Duration(seconds=3600), ) client.apply(customer_fs) N_ROWS = 2 time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) cust_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "cust_id": [i for i in range(N_ROWS)], "cust_rating": [i for i in range(N_ROWS)], "cust_cost": [float(i) + 0.5 for i in range(N_ROWS)], }) client.ingest("cust_fs", cust_df, timeout=600) time.sleep(15) online_request_entity = [{"cust_id": 0}, {"cust_id": 1}] online_request_features = ["cust_rating", "cust_cost"] def try_get_features(): response = client.get_online_features( entity_rows=online_request_entity, feature_refs=online_request_features) return response, True online_features_actual = wait_retry_backoff( retry_fn=try_get_features, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_expected = { "cust_id": [0, 1], "cust_rating": [0, 1], "cust_cost": [0.5, 1.5], } assert online_features_actual.to_dict() == online_features_expected
def test_feature_set_ingest_failure(self, client, dataframe, exception): with pytest.raises(exception): # Create feature set driver_fs = FeatureSet("driver-feature-set") driver_fs.source = KafkaSource(topic="feature-topic", brokers="fake.broker.com") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Update based on dataset driver_fs.infer_fields_from_df(dataframe) # Register with Feast core client.apply(driver_fs) # Ingest data into Feast client.ingest(driver_fs, dataframe=dataframe)
def test_import_tfx_schema(self): tests_folder = pathlib.Path(__file__).parent test_input_schema_json = open(tests_folder / "data" / "tensorflow_metadata" / "bikeshare_schema.json").read() test_input_schema = schema_pb2.Schema() json_format.Parse(test_input_schema_json, test_input_schema) feature_set = FeatureSet( name="bikeshare", entities=[Entity(name="station_id", dtype=ValueType.INT64)], features=[ Feature(name="name", dtype=ValueType.STRING), Feature(name="status", dtype=ValueType.STRING), Feature(name="latitude", dtype=ValueType.FLOAT), Feature(name="longitude", dtype=ValueType.FLOAT), Feature(name="location", dtype=ValueType.STRING), ], ) # Before update for entity in feature_set.entities: assert entity.presence is None assert entity.shape is None for feature in feature_set.features: assert feature.presence is None assert feature.shape is None assert feature.string_domain is None assert feature.float_domain is None assert feature.int_domain is None feature_set.import_tfx_schema(test_input_schema) # After update for entity in feature_set.entities: assert entity.presence is not None assert entity.shape is not None for feature in feature_set.features: assert feature.presence is not None assert feature.shape is not None if feature.name in ["location", "name", "status"]: assert feature.string_domain is not None elif feature.name in ["latitude", "longitude"]: assert feature.float_domain is not None elif feature.name in ["station_id"]: assert feature.int_domain is not None
def test_feature_set_types_success(self, test_client, dataframe, mocker): test_client.set_project("project1") all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), # Feature(name="bool_list_feature", # dtype=ValueType.BOOL_LIST), # TODO: Add support for this # type again https://github.com/feast-dev/feast/issues/341 Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core test_client.apply(all_types_fs) mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=all_types_fs.to_proto()), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest(all_types_fs, dataframe)
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_feature_set_import_export_yaml(self): test_feature_set = FeatureSet( name="bikeshare", entities=[Entity(name="station_id", dtype=ValueType.INT64)], features=[ Feature(name="name", dtype=ValueType.STRING), Feature(name="longitude", dtype=ValueType.FLOAT), Feature(name="location", dtype=ValueType.STRING), ], ) # Create a string YAML representation of the feature set string_yaml = test_feature_set.to_yaml() # Create a new feature set object from the YAML string actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml) # Ensure equality is upheld to original feature set assert test_feature_set == actual_feature_set_from_string