Пример #1
0
 def test_add_remove_features_success(self):
     fs = FeatureSet("my-feature-set")
     fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64))
     fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64))
     fs.drop(name="my-feature-1")
     assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"
def test_basic_retrieve_online_entity_listform(client, list_entity_dataframe):
    # Case 1: Features retrieval with entity in list format check
    district_fs = FeatureSet(
        name="district",
        features=[
            Feature(name="district_rating", dtype=ValueType.INT64),
            Feature(name="district_cost", dtype=ValueType.FLOAT),
            Feature(name="district_past_transactions_int",
                    dtype=ValueType.INT64_LIST),
            Feature(name="district_past_transactions_double",
                    dtype=ValueType.DOUBLE_LIST),
            Feature(name="district_past_transactions_float",
                    dtype=ValueType.FLOAT_LIST),
            Feature(name="district_past_transactions_string",
                    dtype=ValueType.STRING_LIST),
            Feature(name="district_past_transactions_bool",
                    dtype=ValueType.BOOL_LIST),
        ],
        entities=[Entity("district_ids", dtype=ValueType.INT64_LIST)],
        max_age=Duration(seconds=3600),
    )

    client.set_project(PROJECT_NAME)
    client.apply(district_fs)

    district_fs = client.get_feature_set(name="district")
    client.ingest(district_fs, list_entity_dataframe, timeout=600)
    time.sleep(15)

    online_request_entity = [{
        "district_ids": [np.int64(1), np.int64(2),
                         np.int64(3)]
    }]
    online_request_features = [
        "district_rating",
        "district_cost",
        "district_past_transactions_int",
        "district_past_transactions_double",
        "district_past_transactions_float",
        "district_past_transactions_string",
        "district_past_transactions_bool",
    ]
    online_request_entity2 = [{
        "district_ids":
        Value(int64_list_val=Int64List(val=[1, 2, 3]))
    }]

    def try_get_features1():
        response = client.get_online_features(
            entity_rows=online_request_entity,
            feature_refs=online_request_features)
        return response, True

    def try_get_features2():
        response = client.get_online_features(
            entity_rows=online_request_entity2,
            feature_refs=online_request_features)
        return response, True

    online_features_actual = wait_retry_backoff(
        retry_fn=try_get_features1,
        timeout_secs=90,
        timeout_msg="Timed out trying to get online feature values",
    )

    online_features_actual2 = wait_retry_backoff(
        retry_fn=try_get_features2,
        timeout_secs=90,
        timeout_msg="Timed out trying to get online feature values",
    )

    online_features_expected = {
        "district_ids": [[np.int64(1), np.int64(2),
                          np.int64(3)]],
        "district_rating": [1],
        "district_cost": [1.5],
        "district_past_transactions_int": [[1, 3]],
        "district_past_transactions_double": [[1.5, 3.0]],
        "district_past_transactions_float": [[1.5, 3.0]],
        "district_past_transactions_string": [["first_1", "second_1"]],
        "district_past_transactions_bool": [[True, False]],
    }

    assert online_features_actual.to_dict() == online_features_expected
    assert online_features_actual2.to_dict() == online_features_expected

    # Case 2: Features retrieval with entity in list format check with mixed types
    with pytest.raises(ValueError) as excinfo:
        online_request_entity2 = [{
            "district_ids": [np.int64(1), np.int64(2), True]
        }]
        online_features_actual2 = client.get_online_features(
            entity_rows=online_request_entity2,
            feature_refs=online_request_features)

    assert (
        "List value type for field district_ids is inconsistent. ValueType.INT64 different from ValueType.BOOL."
        in str(excinfo.value))
Пример #3
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
Пример #4
0
def test_feature_set_without_labels_empty_dict():
    fs = FeatureSet("my-feature-set")
    assert fs.labels == OrderedDict()
    assert len(fs.labels) == 0
def test_basic_retrieve_online_entity_nonlistform(client,
                                                  nonlist_entity_dataframe,
                                                  list_entity_dataframe):
    # Case 1: Feature retrieval with multiple entities retrieval check
    customer_fs = FeatureSet(
        name="customer2",
        features=[
            Feature(name="customer2_rating", dtype=ValueType.INT64),
            Feature(name="customer2_cost", dtype=ValueType.FLOAT),
            Feature(name="customer2_past_transactions_int",
                    dtype=ValueType.INT64_LIST),
            Feature(name="customer2_past_transactions_double",
                    dtype=ValueType.DOUBLE_LIST),
            Feature(name="customer2_past_transactions_float",
                    dtype=ValueType.FLOAT_LIST),
            Feature(name="customer2_past_transactions_string",
                    dtype=ValueType.STRING_LIST),
            Feature(name="customer2_past_transactions_bool",
                    dtype=ValueType.BOOL_LIST),
        ],
        entities=[Entity("customer_id2", ValueType.INT64)],
        max_age=Duration(seconds=3600),
    )

    client.set_project(PROJECT_NAME)
    client.apply(customer_fs)

    customer_fs = client.get_feature_set(name="customer2")
    client.ingest(customer_fs, nonlist_entity_dataframe, timeout=600)
    time.sleep(15)

    online_request_entity = [{"customer_id2": 0}, {"customer_id2": 1}]
    online_request_features = [
        "customer2_rating",
        "customer2_cost",
        "customer2_past_transactions_int",
        "customer2_past_transactions_double",
        "customer2_past_transactions_float",
        "customer2_past_transactions_string",
        "customer2_past_transactions_bool",
    ]
    online_request_entity2 = [
        {
            "customer_id2": Value(int64_val=0)
        },
        {
            "customer_id2": Value(int64_val=1)
        },
    ]

    def try_get_features1():
        response = client.get_online_features(
            entity_rows=online_request_entity,
            feature_refs=online_request_features)
        # is_ok = check_online_response(
        #     online_request_features, nonlist_entity_dataframe, response
        # )
        return response, True

    def try_get_features2():
        response = client.get_online_features(
            entity_rows=online_request_entity2,
            feature_refs=online_request_features)
        is_ok = check_online_response(online_request_features,
                                      nonlist_entity_dataframe, response)
        return response, is_ok

    online_features_actual1 = wait_retry_backoff(
        retry_fn=try_get_features1,
        timeout_secs=90,
        timeout_msg="Timed out trying to get online feature values",
    )

    online_features_actual2 = wait_retry_backoff(
        retry_fn=try_get_features2,
        timeout_secs=90,
        timeout_msg="Timed out trying to get online feature values",
    )

    online_features_expected = {
        "customer_id2": [0, 1],
        "customer2_rating": [0, 1],
        "customer2_cost": [0.5, 1.5],
        "customer2_past_transactions_int": [[0, 2], [1, 3]],
        "customer2_past_transactions_double": [[0.5, 2.0], [1.5, 3.0]],
        "customer2_past_transactions_float": [[0.5, 2.0], [1.5, 3.0]],
        "customer2_past_transactions_string": [
            ["first_0", "second_0"],
            ["first_1", "second_1"],
        ],
        "customer2_past_transactions_bool": [[True, False], [True, False]],
    }

    assert online_features_actual1.to_dict() == online_features_expected
    assert online_features_actual2.to_dict() == online_features_expected

    # Case 2: Feature retrieval with multiple entities retrieval check with mixed types
    with pytest.raises(TypeError) as excinfo:
        online_request_entity2 = [{
            "customer_id": 0
        }, {
            "customer_id": "error_pls"
        }]
        online_features_actual2 = client.get_online_features(
            entity_rows=online_request_entity2,
            feature_refs=online_request_features)

    assert (
        "Input entity customer_id has mixed types, ValueType.STRING and ValueType.INT64. That is not allowed."
        in str(excinfo.value))
Пример #6
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set",
                               source=KafkaSource(brokers="kafka:9092",
                                                  topic="test"))
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Need to create a mock producer
        with patch("feast.loaders.ingest.KafkaProducer") as mocked_queue:
            # Ingest data into Feast
            client.ingest("driver-feature-set", dataframe)
Пример #7
0
    def test_feature_set_ingest_fail_if_pending(
        self, dataframe, exception, test_client, mocker
    ):
        with pytest.raises(exception):
            test_client.set_project("project1")
            driver_fs = FeatureSet(
                "driver-feature-set",
                source=KafkaSource(brokers="kafka:9092", topic="test"),
            )
            driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
            driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
            driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
            driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

            # Register with Feast core
            test_client.apply(driver_fs)
            driver_fs = driver_fs.to_proto()
            driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING

            mocker.patch.object(
                test_client._core_service_stub,
                "GetFeatureSet",
                return_value=GetFeatureSetResponse(feature_set=driver_fs),
            )

            # Need to create a mock producer
            with patch("feast.client.get_producer"):
                # Ingest data into Feast
                test_client.ingest("driver-feature-set", dataframe, timeout=1)
Пример #8
0
    def test_from_feature_set(self):
        feature_set = FeatureSet("test", "test")
        ref = FeatureSetRef.from_feature_set(feature_set)

        assert ref.name == "test"
        assert ref.project == "test"
Пример #9
0
 def feature_sets(self) -> List[FeatureSet]:
     """
     Getter for the IngestJob's feature sets
     """
     # convert featureset protos to native objects
     return [FeatureSet.from_proto(fs) for fs in self.proto.feature_sets]
Пример #10
0
def test_remove_labels_invalid_key_failure():
    fs = FeatureSet("my-feature-set")
    fs.set_label("k1", "v1")
    with pytest.raises(KeyError):
        fs.remove_label("key1")
Пример #11
0
def test_unequal_feature_set_other_has_no_labels():
    fs1 = FeatureSet("my-feature-set")
    fs2 = FeatureSet("my-feature-set")
    assert fs1 == fs2
    fs1.set_label("k1", "v1")
    assert not fs1 == fs2
Пример #12
0
def test_remove_labels_empty_failure():
    fs = FeatureSet("my-feature-set")
    with pytest.raises(KeyError):
        fs.remove_label("key1")
Пример #13
0
def test_set_labels_overwrites_existing():
    fs = FeatureSet("my-feature-set")
    fs.set_label("k1", "v1")
    fs.set_label("k1", "v2")
    assert fs.labels["k1"] == "v2"
Пример #14
0
def test_set_label_feature_set():
    fs = FeatureSet("my-feature-set")
    fs.set_label("k1", "v1")
    assert fs.labels["k1"] == "v1"
Пример #15
0
 def test_remove_feature_failure(self):
     with pytest.raises(ValueError):
         fs = FeatureSet("my-feature-set")
         fs.drop(name="my-feature-1")
Пример #16
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")

        client._message_producer = MagicMock()
        client._message_producer.produce = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
Пример #17
0
 def test_update_from_source_failure(self):
     with pytest.raises(Exception):
         df = pd.DataFrame()
         fs = FeatureSet("driver-feature-set")
         fs.infer_fields_from_df(df)
Пример #18
0
def test_batch_apply_all_featuresets(client):
    client.set_project(PROJECT_NAME)

    file_fs1 = FeatureSet(
        "file_feature_set",
        features=[Feature("feature_value1", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(file_fs1)

    gcs_fs1 = FeatureSet(
        "gcs_feature_set",
        features=[Feature("feature_value2", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(gcs_fs1)

    proc_time_fs = FeatureSet(
        "processing_time",
        features=[Feature("feature_value3", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(proc_time_fs)

    add_cols_fs = FeatureSet(
        "additional_columns",
        features=[Feature("feature_value4", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(add_cols_fs)

    historical_fs = FeatureSet(
        "historical",
        features=[Feature("feature_value5", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(historical_fs)

    fs1 = FeatureSet(
        "feature_set_1",
        features=[Feature("feature_value6", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )

    fs2 = FeatureSet(
        "feature_set_2",
        features=[Feature("other_feature_value7", ValueType.INT64)],
        entities=[Entity("other_entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(fs1)
    client.apply(fs2)

    no_max_age_fs = FeatureSet(
        "no_max_age",
        features=[Feature("feature_value8", ValueType.INT64)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=0),
    )
    client.apply(no_max_age_fs)
Пример #19
0
    def test_feature_set_ingest_throws_exception_if_kafka_down(
        self, dataframe, test_client, exception, mocker
    ):

        test_client.set_project("project1")
        driver_fs = FeatureSet(
            "driver-feature-set",
            source=KafkaSource(brokers="localhost:4412", topic="test"),
        )
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        test_client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        with pytest.raises(exception):
            test_client.ingest("driver-feature-set", dataframe, timeout=1)
def test_list_entities_and_features(client):
    customer_entity = Entity("customer_id", ValueType.INT64)
    driver_entity = Entity("driver_id", ValueType.INT64)

    customer_feature_rating = Feature(name="rating",
                                      dtype=ValueType.FLOAT,
                                      labels={"key1": "val1"})
    customer_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT)
    driver_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT)
    driver_feature_cost = Feature(name="cost",
                                  dtype=ValueType.FLOAT,
                                  labels={"key1": "val1"})

    filter_by_project_entity_labels_expected = dict([
        ("customer:rating", customer_feature_rating)
    ])

    filter_by_project_entity_expected = dict([
        ("driver:cost", driver_feature_cost),
        ("driver:rating", driver_feature_rating)
    ])

    filter_by_project_labels_expected = dict([
        ("customer:rating", customer_feature_rating),
        ("driver:cost", driver_feature_cost),
    ])

    customer_fs = FeatureSet(
        "customer",
        features=[customer_feature_rating, customer_feature_cost],
        entities=[customer_entity],
        max_age=Duration(seconds=100),
    )

    driver_fs = FeatureSet(
        "driver",
        features=[driver_feature_rating, driver_feature_cost],
        entities=[driver_entity],
        max_age=Duration(seconds=100),
    )

    client.set_project(PROJECT_NAME)
    client.apply(customer_fs)
    client.apply(driver_fs)

    # Test for listing of features
    # Case 1: Filter by: project, entities and labels
    filter_by_project_entity_labels_actual = client.list_features_by_ref(
        project=PROJECT_NAME,
        entities=["customer_id"],
        labels={"key1": "val1"})

    # Case 2: Filter by: project, entities
    filter_by_project_entity_actual = client.list_features_by_ref(
        project=PROJECT_NAME, entities=["driver_id"])

    # Case 3: Filter by: project, labels
    filter_by_project_labels_actual = client.list_features_by_ref(
        project=PROJECT_NAME, labels={"key1": "val1"})

    assert set(filter_by_project_entity_labels_expected) == set(
        filter_by_project_entity_labels_actual)
    assert set(filter_by_project_entity_expected) == set(
        filter_by_project_entity_actual)
    assert set(filter_by_project_labels_expected) == set(
        filter_by_project_labels_actual)
Пример #21
0
    def test_apply_feature_set_success(self, test_client):

        test_client.set_project("project1")

        # Create Feature Sets
        fs1 = FeatureSet("my-feature-set-1")
        fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64))
        fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING))
        fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64))

        fs2 = FeatureSet("my-feature-set-2")
        fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST))
        fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST))
        fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64))

        # Register Feature Set with Core
        test_client.apply(fs1)
        test_client.apply(fs2)

        feature_sets = test_client.list_feature_sets()

        # List Feature Sets
        assert (len(feature_sets) == 2
                and feature_sets[0].name == "my-feature-set-1"
                and feature_sets[0].features[0].name == "fs1-my-feature-1"
                and feature_sets[0].features[0].dtype == ValueType.INT64
                and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST)
Пример #22
0
def test_feature_set_class_contains_labels():
    fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"})
    assert "key1" in fs.labels.keys() and fs.labels["key1"] == "val1"
    assert "key2" in fs.labels.keys() and fs.labels["key2"] == "val2"