def test_to_dataframe_local_file_staging_should_pass( self, retrieve_job, avro_data_path, mocker): mocker.patch.object( retrieve_job.serving_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{avro_data_path}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) retrived_df = retrieve_job.to_dataframe() assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)
def test_to_dataframe_s3_file_staging_should_pass(self, retrieve_job, avro_data_path, mocker): s3_client = boto3.client("s3") target = "test_proj/test_features.avro" s3_client.create_bucket(Bucket=BUCKET) with open(avro_data_path, "rb") as data: s3_client.upload_fileobj(data, BUCKET, target) mocker.patch.object( retrieve_job.serving_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"s3://{BUCKET}/{target}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) retrived_df = retrieve_job.to_dataframe() assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)
def test_get_batch_features(self, mocked_client, mocker): mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("")) mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="driver", project="driver_project", entities=[ EntitySpecProto(name="driver", value_type=ValueProto.ValueType.INT64), EntitySpecProto( name="transaction", value_type=ValueProto.ValueType.INT64, ), ], features=[ FeatureSpecProto( name="driver_id", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="driver_name", value_type=ValueProto.ValueType.STRING, ), ], ), meta=FeatureSetMetaProto( status=FeatureSetStatusProto.STATUS_READY), )), ) expected_dataframe = pd.DataFrame({ "datetime": [datetime.utcnow() for _ in range(3)], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "driver_id": [1001, 1002, 1003], }) final_results = tempfile.mktemp() pandavro.to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mocked_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) mocked_client.set_project("project1") # TODO: Abstract away GCS client and GCP dependency # NOTE: Feast Serving does not allow for feature references # that specify the same feature in the same request. with patch("google.cloud.storage.Client"): response = mocked_client.get_batch_features( entity_rows=pd.DataFrame({ "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], }), feature_refs=["driver:driver_id", "driver_id"], project="driver_project", ) # Type: GetBatchFeaturesResponse assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[["driver_id" ]].equals(expected_dataframe[["driver_id"]])
class TestRetrievalJob: @fixture def retrieve_job(self): serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) job_proto = JobProto( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_RUNNING, ) return RetrievalJob(job_proto, serving_service_stub) @fixture def avro_data_path(self): final_results = tempfile.mktemp() pandavro.to_avro(file_path_or_buffer=final_results, df=TEST_DATA_FRAME) return final_results def test_to_dataframe_local_file_staging_should_pass( self, retrieve_job, avro_data_path, mocker): mocker.patch.object( retrieve_job.serving_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{avro_data_path}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) retrived_df = retrieve_job.to_dataframe() assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True) @mock_s3 def test_to_dataframe_s3_file_staging_should_pass(self, retrieve_job, avro_data_path, mocker): s3_client = boto3.client("s3") target = "test_proj/test_features.avro" s3_client.create_bucket(Bucket=BUCKET) with open(avro_data_path, "rb") as data: s3_client.upload_fileobj(data, BUCKET, target) mocker.patch.object( retrieve_job.serving_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"s3://{BUCKET}/{target}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) retrived_df = retrieve_job.to_dataframe() assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True) @pytest.mark.parametrize( "job_proto,exception", [ ( GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, data_format=DataFormat.DATA_FORMAT_AVRO, error="Testing job failure", )), Exception, ), ( GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, data_format=DataFormat.DATA_FORMAT_INVALID, )), Exception, ), ], ids=["when_retrieve_job_fails", "when_data_format_is_not_avro"], ) def test_to_dataframe_s3_file_staging_should_raise(self, retrieve_job, mocker, job_proto, exception): mocker.patch.object( retrieve_job.serving_stub, "GetJob", return_value=job_proto, ) with raises(exception): retrieve_job.to_dataframe()
def test_get_batch_features(self, mock_client, mocker): mock_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("") ) mock_client._core_service_stub = Core.CoreServiceStub(grpc.insecure_channel("")) mocker.patch.object( mock_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetSpec( name="customer_fs", version=1, entities=[ EntitySpec( name="customer", value_type=ValueProto.ValueType.INT64 ), EntitySpec( name="transaction", value_type=ValueProto.ValueType.INT64 ), ], features=[ FeatureSpec( name="customer_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpec( name="customer_feature_2", value_type=ValueProto.ValueType.STRING, ), ], ) ), ) expected_dataframe = pd.DataFrame( { "datetime": [datetime.utcnow() for _ in range(3)], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "customer_fs:1:customer_feature_1": [1001, 1002, 1003], "customer_fs:1:customer_feature_2": [1001, 1002, 1003], } ) final_results = tempfile.mktemp() to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mock_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse( job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, ) ), ) mocker.patch.object( mock_client._serving_service_stub, "GetJob", return_value=GetJobResponse( job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, ) ), ) mocker.patch.object( mock_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) response = mock_client.get_batch_features( entity_rows=pd.DataFrame( { "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], } ), feature_ids=[ "customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2", ], ) # type: Job assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[ ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"] ].equals( expected_dataframe[ ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"] ] )