def test_feature_set_ingest_throws_exception_if_kafka_down( self, dataframe, test_client, exception, mocker): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="localhost:4412", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) with pytest.raises(exception): test_client.ingest("driver-feature-set", dataframe)
def test_feature_set_ingest_fail_if_pending(self, dataframe, exception, test_client, mocker): with pytest.raises(exception): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest("driver-feature-set", dataframe, timeout=1)
def test_feature_set_ingest_success(self, dataframe, client, mocker): client.set_project("project1") driver_fs = FeatureSet("driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test")) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer") as mocked_queue: # Ingest data into Feast client.ingest("driver-feature-set", dataframe)
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_sources_deduplicate_ingest_jobs(client): source = KafkaSource("localhost:9092", "feast-features") alt_source = KafkaSource("localhost:9092", "feast-data") def get_running_jobs(): return [ job for job in client.list_ingest_jobs() if job.status == IngestionJobStatus.RUNNING ] # stop all ingest jobs ingest_jobs = client.list_ingest_jobs() for ingest_job in ingest_jobs: client.stop_ingest_job(ingest_job) for ingest_job in ingest_jobs: ingest_job.wait(IngestionJobStatus.ABORTED) # register multiple featuresets with the same source # only one ingest job should spawned due to test ingest job deduplication cust_trans_fs = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") driver_fs = FeatureSet.from_yaml(f"{DIR_PATH}/basic/driver_fs.yaml") cust_trans_fs.source, driver_fs.source = source, source client.apply(cust_trans_fs) client.apply(driver_fs) while len(get_running_jobs()) != 1: assert 0 <= len(get_running_jobs()) <= 1 time.sleep(1) # update feature sets with different sources, should spawn 2 ingest jobs driver_fs.source = alt_source client.apply(driver_fs) while len(get_running_jobs()) != 2: assert 1 <= len(get_running_jobs()) <= 2 time.sleep(1) # update feature sets with same source again, should spawn only 1 ingest job driver_fs.source = source client.apply(driver_fs) while len(get_running_jobs()) != 1: assert 1 <= len(get_running_jobs()) <= 2 time.sleep(1)
def test_feature_set_ingest_failure(self, client, dataframe, exception): with pytest.raises(exception): # Create feature set driver_fs = FeatureSet("driver-feature-set") driver_fs.source = KafkaSource(topic="feature-topic", brokers="fake.broker.com") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Update based on dataset driver_fs.infer_fields_from_df(dataframe) # Register with Feast core client.apply(driver_fs) # Ingest data into Feast client.ingest(driver_fs, dataframe=dataframe)
def test_feature_set_types_success(self, client, dataframe, mocker): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) all_types_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Register with Feast core client.apply(all_types_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=all_types_fs.to_proto()), ) # Ingest data into Feast client.ingest(all_types_fs, dataframe=dataframe)
def test_feature_set_ingest_success(self, dataframe, client): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") driver_fs._message_producer = MagicMock() driver_fs._message_producer.send = MagicMock() # Register with Feast core client.apply(driver_fs) # Ingest data into Feast driver_fs.ingest(dataframe=dataframe) # Make sure message producer is called driver_fs._message_producer.send.assert_called()
def test_feature_set_types_success(self, client, dataframe): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) all_types_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") all_types_fs._message_producer = MagicMock() all_types_fs._message_producer.send = MagicMock() # Register with Feast core client.apply(all_types_fs) # Ingest data into Feast all_types_fs.ingest(dataframe=dataframe) # Make sure message producer is called all_types_fs._message_producer.send.assert_called()
def test_feature_set_ingest_failure(self, client, dataframe, exception): with pytest.raises(exception): # Create feature set driver_fs = FeatureSet("driver-feature-set") driver_fs.source = KafkaSource(topic="feature-topic", brokers="fake.broker.com") driver_fs._message_producer = MagicMock() driver_fs._message_producer.send = MagicMock() # Update based on dataset driver_fs.update_from_dataset( dataframe, column_mapping={ "entity_id": Entity(name="entity", dtype=ValueType.INT64) }, ) # Register with Feast core client.apply(driver_fs) # Ingest data into Feast driver_fs.ingest(dataframe=dataframe)