Пример #1
0
    def test_feature_set_ingest_throws_exception_if_kafka_down(
            self, dataframe, test_client, exception, mocker):

        test_client.set_project("project1")
        driver_fs = FeatureSet(
            "driver-feature-set",
            source=KafkaSource(brokers="localhost:4412", topic="test"),
        )
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        test_client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        with pytest.raises(exception):
            test_client.ingest("driver-feature-set", dataframe)
Пример #2
0
    def test_apply_feature_set_success(self, client):

        # Create Feature Sets
        fs1 = FeatureSet("my-feature-set-1")
        fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64))
        fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING))
        fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64))

        fs2 = FeatureSet("my-feature-set-2")
        fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST))
        fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST))
        fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64))

        # Register Feature Set with Core
        client.apply(fs1)
        client.apply(fs2)

        feature_sets = client.list_feature_sets()

        # List Feature Sets
        assert (
            len(feature_sets) == 2
            and feature_sets[0].name == "my-feature-set-1"
            and feature_sets[0].features[0].name == "fs1-my-feature-1"
            and feature_sets[0].features[0].dtype == ValueType.INT64
            and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST
        )
Пример #3
0
    def test_feature_set_ingest_fail_if_pending(self, dataframe, exception,
                                                test_client, mocker):
        with pytest.raises(exception):
            test_client.set_project("project1")
            driver_fs = FeatureSet(
                "driver-feature-set",
                source=KafkaSource(brokers="kafka:9092", topic="test"),
            )
            driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
            driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
            driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
            driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

            # Register with Feast core
            test_client.apply(driver_fs)
            driver_fs = driver_fs.to_proto()
            driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING

            mocker.patch.object(
                test_client._core_service_stub,
                "GetFeatureSet",
                return_value=GetFeatureSetResponse(feature_set=driver_fs),
            )

            # Need to create a mock producer
            with patch("feast.client.get_producer"):
                # Ingest data into Feast
                test_client.ingest("driver-feature-set", dataframe, timeout=1)
Пример #4
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):
        client.set_project("project1")
        driver_fs = FeatureSet("driver-feature-set",
                               source=KafkaSource(brokers="kafka:9092",
                                                  topic="test"))
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer") as mocked_queue:
            # Ingest data into Feast
            client.ingest("driver-feature-set", dataframe)
Пример #5
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")

        client._message_producer = MagicMock()
        client._message_producer.produce = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
Пример #6
0
def test_unequal_feature_based_on_labels():
    f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
    f2 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
    assert f1 == f2
    f3 = Feature(name="feature-1", dtype=ValueType.INT64)
    assert f1 != f3
    f4 = Feature(name="feature-1",
                 dtype=ValueType.INT64,
                 labels={"k1": "notv1"})
    assert f1 != f4
Пример #7
0
    def test_import_tfx_schema(self):
        tests_folder = pathlib.Path(__file__).parent
        test_input_schema_json = open(tests_folder / "data" /
                                      "tensorflow_metadata" /
                                      "bikeshare_schema.json").read()
        test_input_schema = schema_pb2.Schema()
        json_format.Parse(test_input_schema_json, test_input_schema)

        feature_set = FeatureSet(
            name="bikeshare",
            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="name", dtype=ValueType.STRING),
                Feature(name="status", dtype=ValueType.STRING),
                Feature(name="latitude", dtype=ValueType.FLOAT),
                Feature(name="longitude", dtype=ValueType.FLOAT),
                Feature(name="location", dtype=ValueType.STRING),
            ],
        )

        # Before update
        for entity in feature_set.entities:
            assert entity.presence is None
            assert entity.shape is None
        for feature in feature_set.features:
            assert feature.presence is None
            assert feature.shape is None
            assert feature.string_domain is None
            assert feature.float_domain is None
            assert feature.int_domain is None

        feature_set.import_tfx_schema(test_input_schema)

        # After update
        for entity in feature_set.entities:
            assert entity.presence is not None
            assert entity.shape is not None
        for feature in feature_set.features:
            assert feature.presence is not None
            assert feature.shape is not None
            if feature.name in ["location", "name", "status"]:
                assert feature.string_domain is not None
            elif feature.name in ["latitude", "longitude"]:
                assert feature.float_domain is not None
            elif feature.name in ["station_id"]:
                assert feature.int_domain is not None
Пример #8
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
Пример #9
0
def test_feature_class_contains_labels():
    fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"})
    fs.add(
        Feature(
            name="my-feature-1",
            dtype=ValueType.INT64,
            labels={"feature_key1": "feature_val1"},
        ))
    assert "feature_key1" in fs.features[0].labels.keys()
    assert fs.features[0].labels["feature_key1"] == "feature_val1"
Пример #10
0
    def test_feature_set_import_export_yaml(self):

        test_feature_set = FeatureSet(
            name="bikeshare",
            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="name", dtype=ValueType.STRING),
                Feature(name="longitude", dtype=ValueType.FLOAT),
                Feature(name="location", dtype=ValueType.STRING),
            ],
        )

        # Create a string YAML representation of the feature set
        string_yaml = test_feature_set.to_yaml()

        # Create a new feature set object from the YAML string
        actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml)

        # Ensure equality is upheld to original feature set
        assert test_feature_set == actual_feature_set_from_string
Пример #11
0
    def test_feature_set_ingest_success(self, dataframe, client):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")
        driver_fs._message_producer = MagicMock()
        driver_fs._message_producer.send = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        # Ingest data into Feast
        driver_fs.ingest(dataframe=dataframe)

        # Make sure message producer is called
        driver_fs._message_producer.send.assert_called()
Пример #12
0
 def test_add_features_from_df_success(
     self,
     dataframe,
     feature_count,
     entity_count,
     discard_unused_fields,
     features,
     entities,
 ):
     my_feature_set = FeatureSet(
         name="my_feature_set",
         features=[Feature(name="dummy_f1", dtype=ValueType.INT64)],
         entities=[Entity(name="dummy_entity_1", dtype=ValueType.INT64)],
     )
     my_feature_set.infer_fields_from_df(
         dataframe,
         discard_unused_fields=discard_unused_fields,
         features=features,
         entities=entities,
     )
     assert len(my_feature_set.features) == feature_count
     assert len(my_feature_set.entities) == entity_count
Пример #13
0
    def test_feature_set_types_success(self, test_client, dataframe, mocker):

        test_client.set_project("project1")

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                # Feature(name="bool_list_feature",
                # dtype=ValueType.BOOL_LIST), # TODO: Add support for this
                #  type again https://github.com/feast-dev/feast/issues/341
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        test_client.apply(all_types_fs)

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=all_types_fs.to_proto()),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer"):
            # Ingest data into Feast
            test_client.ingest(all_types_fs, dataframe)
Пример #14
0
class TestFeatureSet:
    @pytest.fixture(scope="function")
    def server(self):
        server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
        Core.add_CoreServiceServicer_to_server(CoreServicer(), server)
        server.add_insecure_port("[::]:50051")
        server.start()
        yield server
        server.stop(0)

    @pytest.fixture
    def client(self, server):
        return Client(core_url="localhost:50051")

    def test_add_remove_features_success(self):
        fs = FeatureSet("my-feature-set")
        fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64))
        fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64))
        fs.drop(name="my-feature-1")
        assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"

    def test_remove_feature_failure(self):
        with pytest.raises(ValueError):
            fs = FeatureSet("my-feature-set")
            fs.drop(name="my-feature-1")

    def test_update_from_source_failure(self):
        with pytest.raises(Exception):
            df = pd.DataFrame()
            fs = FeatureSet("driver-feature-set")
            fs.infer_fields_from_df(df)

    @pytest.mark.parametrize(
        "dataframe,feature_count,entity_count,discard_unused_fields,features,entities",
        [
            (
                dataframes.GOOD,
                3,
                1,
                True,
                [],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES,
                5,
                1,
                True,
                [],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES,
                6,
                1,
                True,
                [Feature(name="feature_6", dtype=ValueType.INT64)],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES,
                5,
                2,
                True,
                [],
                [
                    Entity(name="entity_1_id", dtype=ValueType.INT64),
                    Entity(name="entity_2_id", dtype=ValueType.INT64),
                ],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES,
                6,
                3,
                False,
                [],
                [
                    Entity(name="entity_1_id", dtype=ValueType.INT64),
                    Entity(name="entity_2_id", dtype=ValueType.INT64),
                ],
            ),
            (
                dataframes.NO_FEATURES,
                0,
                1,
                True,
                [],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                pd.DataFrame({
                    "datetime": [
                        datetime.utcnow().replace(tzinfo=pytz.utc)
                        for _ in range(3)
                    ]
                }),
                0,
                0,
                True,
                [],
                [],
            ),
        ],
        ids=[
            "Test small dataframe update with hardcoded entity",
            "Test larger dataframe update with hardcoded entity",
            "Test larger dataframe update with hardcoded entity and feature",
            "Test larger dataframe update with two hardcoded entities and discarding of existing fields",
            "Test larger dataframe update with two hardcoded entities and retention of existing fields",
            "Test dataframe with no featuresdataframe",
            "Test empty dataframe",
        ],
    )
    def test_add_features_from_df_success(
        self,
        dataframe,
        feature_count,
        entity_count,
        discard_unused_fields,
        features,
        entities,
    ):
        my_feature_set = FeatureSet(
            name="my_feature_set",
            features=[Feature(name="dummy_f1", dtype=ValueType.INT64)],
            entities=[Entity(name="dummy_entity_1", dtype=ValueType.INT64)],
        )
        my_feature_set.infer_fields_from_df(
            dataframe,
            discard_unused_fields=discard_unused_fields,
            features=features,
            entities=entities,
        )
        assert len(my_feature_set.features) == feature_count
        assert len(my_feature_set.entities) == entity_count
Пример #15
0
def test_unequal_feature_other_has_no_labels():
    f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
    f2 = Feature(name="feature-1", dtype=ValueType.INT64)
    assert f1 != f2
Пример #16
0
    def test_feature_set_types_success(self, client, dataframe):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        all_types_fs.source = KafkaSource(topic="feature-topic",
                                          brokers="127.0.0.1")
        all_types_fs._message_producer = MagicMock()
        all_types_fs._message_producer.send = MagicMock()

        # Register with Feast core
        client.apply(all_types_fs)

        # Ingest data into Feast
        all_types_fs.ingest(dataframe=dataframe)

        # Make sure message producer is called
        all_types_fs._message_producer.send.assert_called()
Пример #17
0
    def test_feature_set_types_success(self, client, dataframe, mocker):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        client.apply(all_types_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest(all_types_fs, dataframe=dataframe)
Пример #18
0
class TestFeatureSet:
    @pytest.fixture(scope="function")
    def server(self):
        server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
        Core.add_CoreServiceServicer_to_server(CoreServicer(), server)
        server.add_insecure_port("[::]:50051")
        server.start()
        yield server
        server.stop(0)

    @pytest.fixture
    def client(self, server):
        return Client(core_url="localhost:50051")

    def test_add_remove_features_success(self):
        fs = FeatureSet("my-feature-set")
        fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64))
        fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64))
        fs.drop(name="my-feature-1")
        assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"

    def test_remove_feature_failure(self):
        with pytest.raises(ValueError):
            fs = FeatureSet("my-feature-set")
            fs.drop(name="my-feature-1")

    def test_update_from_source_failure(self):
        with pytest.raises(Exception):
            df = pd.DataFrame()
            fs = FeatureSet("driver-feature-set")
            fs.infer_fields_from_df(df)

    @pytest.mark.parametrize(
        "dataframe,feature_count,entity_count,discard_unused_fields,features,entities",
        [
            (
                dataframes.GOOD,
                3,
                1,
                True,
                [],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES,
                5,
                1,
                True,
                [],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES,
                6,
                1,
                True,
                [Feature(name="feature_6", dtype=ValueType.INT64)],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES,
                5,
                2,
                True,
                [],
                [
                    Entity(name="entity_1_id", dtype=ValueType.INT64),
                    Entity(name="entity_2_id", dtype=ValueType.INT64),
                ],
            ),
            (
                dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES,
                6,
                3,
                False,
                [],
                [
                    Entity(name="entity_1_id", dtype=ValueType.INT64),
                    Entity(name="entity_2_id", dtype=ValueType.INT64),
                ],
            ),
            (
                dataframes.NO_FEATURES,
                0,
                1,
                True,
                [],
                [Entity(name="entity_id", dtype=ValueType.INT64)],
            ),
            (
                pd.DataFrame({
                    "datetime": [
                        datetime.utcnow().replace(tzinfo=pytz.utc)
                        for _ in range(3)
                    ]
                }),
                0,
                0,
                True,
                [],
                [],
            ),
        ],
        ids=[
            "Test small dataframe update with hardcoded entity",
            "Test larger dataframe update with hardcoded entity",
            "Test larger dataframe update with hardcoded entity and feature",
            "Test larger dataframe update with two hardcoded entities and discarding of existing fields",
            "Test larger dataframe update with two hardcoded entities and retention of existing fields",
            "Test dataframe with no featuresdataframe",
            "Test empty dataframe",
        ],
    )
    def test_add_features_from_df_success(
        self,
        dataframe,
        feature_count,
        entity_count,
        discard_unused_fields,
        features,
        entities,
    ):
        my_feature_set = FeatureSet(
            name="my_feature_set",
            features=[Feature(name="dummy_f1", dtype=ValueType.INT64)],
            entities=[Entity(name="dummy_entity_1", dtype=ValueType.INT64)],
        )
        my_feature_set.infer_fields_from_df(
            dataframe,
            discard_unused_fields=discard_unused_fields,
            features=features,
            entities=entities,
        )
        assert len(my_feature_set.features) == feature_count
        assert len(my_feature_set.entities) == entity_count

    def test_import_tfx_schema(self):
        tests_folder = pathlib.Path(__file__).parent
        test_input_schema_json = open(tests_folder / "data" /
                                      "tensorflow_metadata" /
                                      "bikeshare_schema.json").read()
        test_input_schema = schema_pb2.Schema()
        json_format.Parse(test_input_schema_json, test_input_schema)

        feature_set = FeatureSet(
            name="bikeshare",
            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="name", dtype=ValueType.STRING),
                Feature(name="status", dtype=ValueType.STRING),
                Feature(name="latitude", dtype=ValueType.FLOAT),
                Feature(name="longitude", dtype=ValueType.FLOAT),
                Feature(name="location", dtype=ValueType.STRING),
            ],
        )

        # Before update
        for entity in feature_set.entities:
            assert entity.presence is None
            assert entity.shape is None
        for feature in feature_set.features:
            assert feature.presence is None
            assert feature.shape is None
            assert feature.string_domain is None
            assert feature.float_domain is None
            assert feature.int_domain is None

        feature_set.import_tfx_schema(test_input_schema)

        # After update
        for feature in feature_set.features:
            assert feature.presence is not None
            assert feature.shape is not None
            if feature.name in ["location", "name", "status"]:
                assert feature.string_domain is not None
            elif feature.name in ["latitude", "longitude"]:
                assert feature.float_domain is not None
            elif feature.name in ["station_id"]:
                assert feature.int_domain is not None

    def test_export_tfx_schema(self):
        tests_folder = pathlib.Path(__file__).parent
        test_input_feature_set = FeatureSet.from_yaml(
            str(tests_folder / "data" / "tensorflow_metadata" /
                "bikeshare_feature_set.yaml"))

        expected_schema_json = open(tests_folder / "data" /
                                    "tensorflow_metadata" /
                                    "bikeshare_schema.json").read()
        expected_schema = schema_pb2.Schema()
        json_format.Parse(expected_schema_json, expected_schema)
        _make_tfx_schema_domain_info_inline(expected_schema)

        actual_schema = test_input_feature_set.export_tfx_schema()

        assert len(actual_schema.feature) == len(expected_schema.feature)
        for actual, expected in zip(actual_schema.feature,
                                    expected_schema.feature):
            assert actual.SerializeToString() == expected.SerializeToString()

    def test_feature_set_import_export_yaml(self):

        test_feature_set = FeatureSet(
            name="bikeshare",
            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="name", dtype=ValueType.STRING),
                Feature(name="longitude", dtype=ValueType.FLOAT),
                Feature(name="location", dtype=ValueType.STRING),
            ],
        )

        # Create a string YAML representation of the feature set
        string_yaml = test_feature_set.to_yaml()

        # Create a new feature set object from the YAML string
        actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml)

        # Ensure equality is upheld to original feature set
        assert test_feature_set == actual_feature_set_from_string
Пример #19
0
 def test_add_remove_features_success(self):
     fs = FeatureSet("my-feature-set")
     fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64))
     fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64))
     fs.drop(name="my-feature-1")
     assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"
Пример #20
0
def test_feature_without_labels_empty_dict():
    f = Feature("my feature", dtype=ValueType.INT64)
    assert f.labels == OrderedDict()
    assert len(f.labels) == 0