def test_add_remove_features_success(self): fs = FeatureSet("my-feature-set") fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64)) fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) fs.drop(name="my-feature-1") assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"
def test_basic_retrieve_online_entity_listform(client, list_entity_dataframe): # Case 1: Features retrieval with entity in list format check district_fs = FeatureSet( name="district", features=[ Feature(name="district_rating", dtype=ValueType.INT64), Feature(name="district_cost", dtype=ValueType.FLOAT), Feature(name="district_past_transactions_int", dtype=ValueType.INT64_LIST), Feature(name="district_past_transactions_double", dtype=ValueType.DOUBLE_LIST), Feature(name="district_past_transactions_float", dtype=ValueType.FLOAT_LIST), Feature(name="district_past_transactions_string", dtype=ValueType.STRING_LIST), Feature(name="district_past_transactions_bool", dtype=ValueType.BOOL_LIST), ], entities=[Entity("district_ids", dtype=ValueType.INT64_LIST)], max_age=Duration(seconds=3600), ) client.set_project(PROJECT_NAME) client.apply(district_fs) district_fs = client.get_feature_set(name="district") client.ingest(district_fs, list_entity_dataframe, timeout=600) time.sleep(15) online_request_entity = [{ "district_ids": [np.int64(1), np.int64(2), np.int64(3)] }] online_request_features = [ "district_rating", "district_cost", "district_past_transactions_int", "district_past_transactions_double", "district_past_transactions_float", "district_past_transactions_string", "district_past_transactions_bool", ] online_request_entity2 = [{ "district_ids": Value(int64_list_val=Int64List(val=[1, 2, 3])) }] def try_get_features1(): response = client.get_online_features( entity_rows=online_request_entity, feature_refs=online_request_features) return response, True def try_get_features2(): response = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) return response, True online_features_actual = wait_retry_backoff( retry_fn=try_get_features1, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_actual2 = wait_retry_backoff( retry_fn=try_get_features2, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_expected = { "district_ids": [[np.int64(1), np.int64(2), np.int64(3)]], "district_rating": [1], "district_cost": [1.5], "district_past_transactions_int": [[1, 3]], "district_past_transactions_double": [[1.5, 3.0]], "district_past_transactions_float": [[1.5, 3.0]], "district_past_transactions_string": [["first_1", "second_1"]], "district_past_transactions_bool": [[True, False]], } assert online_features_actual.to_dict() == online_features_expected assert online_features_actual2.to_dict() == online_features_expected # Case 2: Features retrieval with entity in list format check with mixed types with pytest.raises(ValueError) as excinfo: online_request_entity2 = [{ "district_ids": [np.int64(1), np.int64(2), True] }] online_features_actual2 = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) assert ( "List value type for field district_ids is inconsistent. ValueType.INT64 different from ValueType.BOOL." in str(excinfo.value))
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_feature_set_without_labels_empty_dict(): fs = FeatureSet("my-feature-set") assert fs.labels == OrderedDict() assert len(fs.labels) == 0
def test_basic_retrieve_online_entity_nonlistform(client, nonlist_entity_dataframe, list_entity_dataframe): # Case 1: Feature retrieval with multiple entities retrieval check customer_fs = FeatureSet( name="customer2", features=[ Feature(name="customer2_rating", dtype=ValueType.INT64), Feature(name="customer2_cost", dtype=ValueType.FLOAT), Feature(name="customer2_past_transactions_int", dtype=ValueType.INT64_LIST), Feature(name="customer2_past_transactions_double", dtype=ValueType.DOUBLE_LIST), Feature(name="customer2_past_transactions_float", dtype=ValueType.FLOAT_LIST), Feature(name="customer2_past_transactions_string", dtype=ValueType.STRING_LIST), Feature(name="customer2_past_transactions_bool", dtype=ValueType.BOOL_LIST), ], entities=[Entity("customer_id2", ValueType.INT64)], max_age=Duration(seconds=3600), ) client.set_project(PROJECT_NAME) client.apply(customer_fs) customer_fs = client.get_feature_set(name="customer2") client.ingest(customer_fs, nonlist_entity_dataframe, timeout=600) time.sleep(15) online_request_entity = [{"customer_id2": 0}, {"customer_id2": 1}] online_request_features = [ "customer2_rating", "customer2_cost", "customer2_past_transactions_int", "customer2_past_transactions_double", "customer2_past_transactions_float", "customer2_past_transactions_string", "customer2_past_transactions_bool", ] online_request_entity2 = [ { "customer_id2": Value(int64_val=0) }, { "customer_id2": Value(int64_val=1) }, ] def try_get_features1(): response = client.get_online_features( entity_rows=online_request_entity, feature_refs=online_request_features) # is_ok = check_online_response( # online_request_features, nonlist_entity_dataframe, response # ) return response, True def try_get_features2(): response = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) is_ok = check_online_response(online_request_features, nonlist_entity_dataframe, response) return response, is_ok online_features_actual1 = wait_retry_backoff( retry_fn=try_get_features1, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_actual2 = wait_retry_backoff( retry_fn=try_get_features2, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_expected = { "customer_id2": [0, 1], "customer2_rating": [0, 1], "customer2_cost": [0.5, 1.5], "customer2_past_transactions_int": [[0, 2], [1, 3]], "customer2_past_transactions_double": [[0.5, 2.0], [1.5, 3.0]], "customer2_past_transactions_float": [[0.5, 2.0], [1.5, 3.0]], "customer2_past_transactions_string": [ ["first_0", "second_0"], ["first_1", "second_1"], ], "customer2_past_transactions_bool": [[True, False], [True, False]], } assert online_features_actual1.to_dict() == online_features_expected assert online_features_actual2.to_dict() == online_features_expected # Case 2: Feature retrieval with multiple entities retrieval check with mixed types with pytest.raises(TypeError) as excinfo: online_request_entity2 = [{ "customer_id": 0 }, { "customer_id": "error_pls" }] online_features_actual2 = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) assert ( "Input entity customer_id has mixed types, ValueType.STRING and ValueType.INT64. That is not allowed." in str(excinfo.value))
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test")) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=driver_fs.to_proto()), ) # Need to create a mock producer with patch("feast.loaders.ingest.KafkaProducer") as mocked_queue: # Ingest data into Feast client.ingest("driver-feature-set", dataframe)
def test_feature_set_ingest_fail_if_pending( self, dataframe, exception, test_client, mocker ): with pytest.raises(exception): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest("driver-feature-set", dataframe, timeout=1)
def test_from_feature_set(self): feature_set = FeatureSet("test", "test") ref = FeatureSetRef.from_feature_set(feature_set) assert ref.name == "test" assert ref.project == "test"
def feature_sets(self) -> List[FeatureSet]: """ Getter for the IngestJob's feature sets """ # convert featureset protos to native objects return [FeatureSet.from_proto(fs) for fs in self.proto.feature_sets]
def test_remove_labels_invalid_key_failure(): fs = FeatureSet("my-feature-set") fs.set_label("k1", "v1") with pytest.raises(KeyError): fs.remove_label("key1")
def test_unequal_feature_set_other_has_no_labels(): fs1 = FeatureSet("my-feature-set") fs2 = FeatureSet("my-feature-set") assert fs1 == fs2 fs1.set_label("k1", "v1") assert not fs1 == fs2
def test_remove_labels_empty_failure(): fs = FeatureSet("my-feature-set") with pytest.raises(KeyError): fs.remove_label("key1")
def test_set_labels_overwrites_existing(): fs = FeatureSet("my-feature-set") fs.set_label("k1", "v1") fs.set_label("k1", "v2") assert fs.labels["k1"] == "v2"
def test_set_label_feature_set(): fs = FeatureSet("my-feature-set") fs.set_label("k1", "v1") assert fs.labels["k1"] == "v1"
def test_remove_feature_failure(self): with pytest.raises(ValueError): fs = FeatureSet("my-feature-set") fs.drop(name="my-feature-1")
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_update_from_source_failure(self): with pytest.raises(Exception): df = pd.DataFrame() fs = FeatureSet("driver-feature-set") fs.infer_fields_from_df(df)
def test_batch_apply_all_featuresets(client): client.set_project(PROJECT_NAME) file_fs1 = FeatureSet( "file_feature_set", features=[Feature("feature_value1", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(file_fs1) gcs_fs1 = FeatureSet( "gcs_feature_set", features=[Feature("feature_value2", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(gcs_fs1) proc_time_fs = FeatureSet( "processing_time", features=[Feature("feature_value3", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(proc_time_fs) add_cols_fs = FeatureSet( "additional_columns", features=[Feature("feature_value4", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(add_cols_fs) historical_fs = FeatureSet( "historical", features=[Feature("feature_value5", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(historical_fs) fs1 = FeatureSet( "feature_set_1", features=[Feature("feature_value6", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) fs2 = FeatureSet( "feature_set_2", features=[Feature("other_feature_value7", ValueType.INT64)], entities=[Entity("other_entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(fs1) client.apply(fs2) no_max_age_fs = FeatureSet( "no_max_age", features=[Feature("feature_value8", ValueType.INT64)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=0), ) client.apply(no_max_age_fs)
def test_feature_set_ingest_throws_exception_if_kafka_down( self, dataframe, test_client, exception, mocker ): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="localhost:4412", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) with pytest.raises(exception): test_client.ingest("driver-feature-set", dataframe, timeout=1)
def test_list_entities_and_features(client): customer_entity = Entity("customer_id", ValueType.INT64) driver_entity = Entity("driver_id", ValueType.INT64) customer_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT, labels={"key1": "val1"}) customer_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT) driver_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT) driver_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT, labels={"key1": "val1"}) filter_by_project_entity_labels_expected = dict([ ("customer:rating", customer_feature_rating) ]) filter_by_project_entity_expected = dict([ ("driver:cost", driver_feature_cost), ("driver:rating", driver_feature_rating) ]) filter_by_project_labels_expected = dict([ ("customer:rating", customer_feature_rating), ("driver:cost", driver_feature_cost), ]) customer_fs = FeatureSet( "customer", features=[customer_feature_rating, customer_feature_cost], entities=[customer_entity], max_age=Duration(seconds=100), ) driver_fs = FeatureSet( "driver", features=[driver_feature_rating, driver_feature_cost], entities=[driver_entity], max_age=Duration(seconds=100), ) client.set_project(PROJECT_NAME) client.apply(customer_fs) client.apply(driver_fs) # Test for listing of features # Case 1: Filter by: project, entities and labels filter_by_project_entity_labels_actual = client.list_features_by_ref( project=PROJECT_NAME, entities=["customer_id"], labels={"key1": "val1"}) # Case 2: Filter by: project, entities filter_by_project_entity_actual = client.list_features_by_ref( project=PROJECT_NAME, entities=["driver_id"]) # Case 3: Filter by: project, labels filter_by_project_labels_actual = client.list_features_by_ref( project=PROJECT_NAME, labels={"key1": "val1"}) assert set(filter_by_project_entity_labels_expected) == set( filter_by_project_entity_labels_actual) assert set(filter_by_project_entity_expected) == set( filter_by_project_entity_actual) assert set(filter_by_project_labels_expected) == set( filter_by_project_labels_actual)
def test_apply_feature_set_success(self, test_client): test_client.set_project("project1") # Create Feature Sets fs1 = FeatureSet("my-feature-set-1") fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64)) fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING)) fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64)) fs2 = FeatureSet("my-feature-set-2") fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST)) fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST)) fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64)) # Register Feature Set with Core test_client.apply(fs1) test_client.apply(fs2) feature_sets = test_client.list_feature_sets() # List Feature Sets assert (len(feature_sets) == 2 and feature_sets[0].name == "my-feature-set-1" and feature_sets[0].features[0].name == "fs1-my-feature-1" and feature_sets[0].features[0].dtype == ValueType.INT64 and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST)
def test_feature_set_class_contains_labels(): fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"}) assert "key1" in fs.labels.keys() and fs.labels["key1"] == "val1" assert "key2" in fs.labels.keys() and fs.labels["key2"] == "val2"