Exemplo n.º 1
0
    def test_feature_set_ingest_fail_if_pending(self, dataframe, exception,
                                                test_client, mocker):
        with pytest.raises(exception):
            test_client.set_project("project1")
            driver_fs = FeatureSet(
                "driver-feature-set",
                source=KafkaSource(brokers="kafka:9092", topic="test"),
            )
            driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
            driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
            driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
            driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

            # Register with Feast core
            test_client.apply(driver_fs)
            driver_fs = driver_fs.to_proto()
            driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING

            mocker.patch.object(
                test_client._core_service_stub,
                "GetFeatureSet",
                return_value=GetFeatureSetResponse(feature_set=driver_fs),
            )

            # Need to create a mock producer
            with patch("feast.client.get_producer"):
                # Ingest data into Feast
                test_client.ingest("driver-feature-set", dataframe, timeout=1)
Exemplo n.º 2
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):
        client.set_project("project1")
        driver_fs = FeatureSet("driver-feature-set",
                               source=KafkaSource(brokers="kafka:9092",
                                                  topic="test"))
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer") as mocked_queue:
            # Ingest data into Feast
            client.ingest("driver-feature-set", dataframe)
Exemplo n.º 3
0
    def test_feature_set_ingest_throws_exception_if_kafka_down(
            self, dataframe, test_client, exception, mocker):

        test_client.set_project("project1")
        driver_fs = FeatureSet(
            "driver-feature-set",
            source=KafkaSource(brokers="localhost:4412", topic="test"),
        )
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        test_client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        with pytest.raises(exception):
            test_client.ingest("driver-feature-set", dataframe)
Exemplo n.º 4
0
    def test_feature_set_types_success(self, client, dataframe, mocker):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        client.apply(all_types_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest(all_types_fs, dataframe=dataframe)
Exemplo n.º 5
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")

        client._message_producer = MagicMock()
        client._message_producer.produce = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
Exemplo n.º 6
0
    def test_get_feature_set(self, mocked_client, mocker):
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel("")
        )

        from google.protobuf.duration_pb2 import Duration

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetProto(
                    spec=FeatureSetSpecProto(
                        name="my_feature_set",
                        max_age=Duration(seconds=3600),
                        labels={"key1": "val1", "key2": "val2"},
                        features=[
                            FeatureSpecProto(
                                name="my_feature_1",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                            FeatureSpecProto(
                                name="my_feature_2",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                        ],
                        entities=[
                            EntitySpecProto(
                                name="my_entity_1",
                                value_type=ValueProto.ValueType.INT64,
                            )
                        ],
                        source=Source(
                            type=SourceType.KAFKA,
                            kafka_source_config=KafkaSourceConfig(
                                bootstrap_servers="localhost:9092", topic="topic"
                            ),
                        ),
                    ),
                    meta=FeatureSetMetaProto(),
                )
            ),
        )
        mocked_client.set_project("my_project")
        feature_set = mocked_client.get_feature_set("my_feature_set")

        assert (
            feature_set.name == "my_feature_set"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["my_feature_1"].name == "my_feature_1"
            and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
            and feature_set.fields["my_entity_1"].name == "my_entity_1"
            and feature_set.fields["my_entity_1"].dtype == ValueType.INT64
            and len(feature_set.features) == 2
            and len(feature_set.entities) == 1
        )
Exemplo n.º 7
0
    def test_get_feature_set(self, mock_client, mocker):
        mock_client._core_service_stub = Core.CoreServiceStub(grpc.insecure_channel(""))

        from google.protobuf.duration_pb2 import Duration

        mocker.patch.object(
            mock_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetSpec(
                    name="my_feature_set",
                    version=2,
                    max_age=Duration(seconds=3600),
                    features=[
                        FeatureSpec(
                            name="my_feature_1", value_type=ValueProto.ValueType.FLOAT
                        ),
                        FeatureSpec(
                            name="my_feature_2", value_type=ValueProto.ValueType.FLOAT
                        ),
                    ],
                    entities=[
                        EntitySpec(
                            name="my_entity_1", value_type=ValueProto.ValueType.INT64
                        )
                    ],
                    source=Source(
                        type=SourceType.KAFKA,
                        kafka_source_config=KafkaSourceConfig(
                            bootstrap_servers="localhost:9092", topic="topic"
                        ),
                    ),
                )
            ),
        )

        feature_set = mock_client.get_feature_set("my_feature_set", version=2)

        assert (
            feature_set.name == "my_feature_set"
            and feature_set.version == 2
            and feature_set.fields["my_feature_1"].name == "my_feature_1"
            and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
            and feature_set.fields["my_entity_1"].name == "my_entity_1"
            and feature_set.fields["my_entity_1"].dtype == ValueType.INT64
            and len(feature_set.features) == 2
            and len(feature_set.entities) == 1
        )
Exemplo n.º 8
0
    def test_feature_set_types_success(self, test_client, dataframe, mocker):

        test_client.set_project("project1")

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                # Feature(name="bool_list_feature",
                # dtype=ValueType.BOOL_LIST), # TODO: Add support for this
                #  type again https://github.com/feast-dev/feast/issues/341
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        test_client.apply(all_types_fs)

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=all_types_fs.to_proto()),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer"):
            # Ingest data into Feast
            test_client.ingest(all_types_fs, dataframe)
Exemplo n.º 9
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
Exemplo n.º 10
0
    def test_get_batch_features(self, mocked_client, mocker):

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel(""))

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=FeatureSetProto(
                spec=FeatureSetSpecProto(
                    name="driver",
                    project="driver_project",
                    entities=[
                        EntitySpecProto(name="driver",
                                        value_type=ValueProto.ValueType.INT64),
                        EntitySpecProto(
                            name="transaction",
                            value_type=ValueProto.ValueType.INT64,
                        ),
                    ],
                    features=[
                        FeatureSpecProto(
                            name="driver_id",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpecProto(
                            name="driver_name",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                ),
                meta=FeatureSetMetaProto(
                    status=FeatureSetStatusProto.STATUS_READY),
            )),
        )

        expected_dataframe = pd.DataFrame({
            "datetime": [datetime.utcnow() for _ in range(3)],
            "driver": [1001, 1002, 1003],
            "transaction": [1001, 1002, 1003],
            "driver_id": [1001, 1002, 1003],
        })

        final_results = tempfile.mktemp()
        pandavro.to_avro(file_path_or_buffer=final_results,
                         df=expected_dataframe)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        mocked_client.set_project("project1")
        # TODO: Abstract away GCS client and GCP dependency
        # NOTE: Feast Serving does not allow for feature references
        # that specify the same feature in the same request.
        with patch("google.cloud.storage.Client"):
            response = mocked_client.get_batch_features(
                entity_rows=pd.DataFrame({
                    "datetime": [
                        pd.datetime.now(tz=timezone("Asia/Singapore"))
                        for _ in range(3)
                    ],
                    "driver": [1001, 1002, 1003],
                    "transaction": [1001, 1002, 1003],
                }),
                feature_refs=["driver:driver_id", "driver_id"],
                project="driver_project",
            )  # Type: GetBatchFeaturesResponse

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[["driver_id"
                                 ]].equals(expected_dataframe[["driver_id"]])
Exemplo n.º 11
0
    def test_get_batch_features(self, mock_client, mocker):

        mock_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel("")
        )
        mock_client._core_service_stub = Core.CoreServiceStub(grpc.insecure_channel(""))

        mocker.patch.object(
            mock_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetSpec(
                    name="customer_fs",
                    version=1,
                    entities=[
                        EntitySpec(
                            name="customer", value_type=ValueProto.ValueType.INT64
                        ),
                        EntitySpec(
                            name="transaction", value_type=ValueProto.ValueType.INT64
                        ),
                    ],
                    features=[
                        FeatureSpec(
                            name="customer_feature_1",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpec(
                            name="customer_feature_2",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                )
            ),
        )

        expected_dataframe = pd.DataFrame(
            {
                "datetime": [datetime.utcnow() for _ in range(3)],
                "customer": [1001, 1002, 1003],
                "transaction": [1001, 1002, 1003],
                "customer_fs:1:customer_feature_1": [1001, 1002, 1003],
                "customer_fs:1:customer_feature_2": [1001, 1002, 1003],
            }
        )

        final_results = tempfile.mktemp()
        to_avro(file_path_or_buffer=final_results, df=expected_dataframe)

        mocker.patch.object(
            mock_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(
                job=BatchFeaturesJob(
                    id="123",
                    type=JobType.JOB_TYPE_DOWNLOAD,
                    status=JobStatus.JOB_STATUS_DONE,
                    file_uris=[f"file://{final_results}"],
                    data_format=DataFormat.DATA_FORMAT_AVRO,
                )
            ),
        )

        mocker.patch.object(
            mock_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(
                job=BatchFeaturesJob(
                    id="123",
                    type=JobType.JOB_TYPE_DOWNLOAD,
                    status=JobStatus.JOB_STATUS_DONE,
                    file_uris=[f"file://{final_results}"],
                    data_format=DataFormat.DATA_FORMAT_AVRO,
                )
            ),
        )

        mocker.patch.object(
            mock_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        response = mock_client.get_batch_features(
            entity_rows=pd.DataFrame(
                {
                    "datetime": [
                        pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3)
                    ],
                    "customer": [1001, 1002, 1003],
                    "transaction": [1001, 1002, 1003],
                }
            ),
            feature_ids=[
                "customer_fs:1:customer_feature_1",
                "customer_fs:1:customer_feature_2",
            ],
        )  # type: Job

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[
            ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"]
        ].equals(
            expected_dataframe[
                ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"]
            ]
        )