示例#1
0
    def test_apply_feature_set_success(self, client):

        # Create Feature Sets
        fs1 = FeatureSet("my-feature-set-1")
        fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64))
        fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING))
        fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64))

        fs2 = FeatureSet("my-feature-set-2")
        fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST))
        fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST))
        fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64))

        # Register Feature Set with Core
        client.apply(fs1)
        client.apply(fs2)

        feature_sets = client.list_feature_sets()

        # List Feature Sets
        assert (
            len(feature_sets) == 2
            and feature_sets[0].name == "my-feature-set-1"
            and feature_sets[0].features[0].name == "fs1-my-feature-1"
            and feature_sets[0].features[0].dtype == ValueType.INT64
            and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST
        )
示例#2
0
def test_list_entities_and_features(client):
    customer_entity = Entity("customer_id", ValueType.INT64)
    driver_entity = Entity("driver_id", ValueType.INT64)

    customer_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT, labels={"key1":"val1"})
    customer_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT)
    driver_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT)
    driver_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT, labels={"key1":"val1"})

    filter_by_project_entity_labels_expected = dict([
        ("customer:rating", customer_feature_rating)
    ])

    filter_by_project_entity_expected = dict([
        ("driver:cost", driver_feature_cost),
        ("driver:rating", driver_feature_rating)
    ])

    filter_by_project_labels_expected = dict([
        ("customer:rating", customer_feature_rating),
        ("driver:cost", driver_feature_cost)
    ])

    customer_fs = FeatureSet(
        "customer",
        features=[
            customer_feature_rating,
            customer_feature_cost
        ],
        entities=[customer_entity],
        max_age=Duration(seconds=100)
    )

    driver_fs = FeatureSet(
        "driver",
        features=[
            driver_feature_rating,
            driver_feature_cost
        ],
        entities=[driver_entity],
        max_age=Duration(seconds=100)
    )

    client.set_project(PROJECT_NAME)
    client.apply(customer_fs)
    client.apply(driver_fs)

    # Test for listing of features
    # Case 1: Filter by: project, entities and labels
    filter_by_project_entity_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["customer_id"], labels={"key1":"val1"})
    
    # Case 2: Filter by: project, entities
    filter_by_project_entity_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["driver_id"])
    
    # Case 3: Filter by: project, labels
    filter_by_project_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, labels={"key1":"val1"})

    assert set(filter_by_project_entity_labels_expected) == set(filter_by_project_entity_labels_actual)
    assert set(filter_by_project_entity_expected) == set(filter_by_project_entity_actual)
    assert set(filter_by_project_labels_expected) == set(filter_by_project_labels_actual)
示例#3
0
def test_multiple_featureset_joins(client):
    fs1 = FeatureSet(
        "feature_set_1",
        features=[Feature("feature_value", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )

    fs2 = FeatureSet(
        "feature_set_2",
        features=[Feature("other_feature_value", ValueType.INT64)],
        entities=[Entity("other_entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )

    client.apply(fs1)
    time.sleep(10)
    fs1 = client.get_feature_set(name="feature_set_1", version=1)

    client.apply(fs2)
    time.sleep(10)
    fs2 = client.get_feature_set(name="feature_set_2", version=1)

    N_ROWS = 10
    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    features_1_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": [f"{i}" for i in range(N_ROWS)],
    })
    client.ingest(fs1, features_1_df)

    features_2_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "other_entity_id": [i for i in range(N_ROWS)],
        "other_feature_value": [i for i in range(N_ROWS)],
    })
    client.ingest(fs2, features_2_df)

    entity_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)],
    })
    feature_retrieval_job = client.get_batch_features(
        entity_rows=entity_df,
        feature_ids=[
            "feature_set_1:1:feature_value",
            "feature_set_2:1:other_feature_value"
        ])
    output = feature_retrieval_job.to_dataframe()
    print(output.head())

    assert output["entity_id"].to_list() == [
        int(i) for i in output["feature_set_1_v1_feature_value"].to_list()
    ]
    assert output["other_entity_id"].to_list(
    ) == output["feature_set_2_v1_other_feature_value"].to_list()
示例#4
0
def test_unequal_feature_set_based_on_labels():
    fs1 = FeatureSet("my-feature-set")
    fs2 = FeatureSet("my-feature-set")
    assert fs1 == fs2
    fs1.set_label("k1", "v1")
    fs2.set_label("k1", "v1")
    assert fs1 == fs2
    fs2.set_label("k1", "unequal")
    assert not fs1 == fs2
示例#5
0
def test_basic_register_feature_set_success(client):
    # Register feature set without project
    cust_trans_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    driver_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/driver_fs.yaml")
    client.apply(cust_trans_fs_expected)
    client.apply(driver_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions")
    assert cust_trans_fs_actual == cust_trans_fs_expected
    driver_fs_actual = client.get_feature_set("driver")
    assert driver_fs_actual == driver_fs_expected

    # Register feature set with project
    cust_trans_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    client.set_project(PROJECT_NAME)
    client.apply(cust_trans_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions",
                                                  project=PROJECT_NAME)
    assert cust_trans_fs_actual == cust_trans_fs_expected

    # Register feature set with labels
    driver_unlabelled_fs = FeatureSet(
        "driver_unlabelled",
        features=[
            Feature("rating", ValueType.FLOAT),
            Feature("cost", ValueType.FLOAT)
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    driver_labeled_fs_expected = FeatureSet(
        "driver_labeled",
        features=[
            Feature("rating", ValueType.FLOAT),
            Feature("cost", ValueType.FLOAT)
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
        labels={"key1": "val1"},
    )
    client.set_project(PROJECT_NAME)
    client.apply(driver_unlabelled_fs)
    client.apply(driver_labeled_fs_expected)
    driver_fs_actual = client.list_feature_sets(project=PROJECT_NAME,
                                                labels={"key1": "val1"})[0]
    assert driver_fs_actual == driver_labeled_fs_expected

    # reset client's project for other tests
    client.set_project()
示例#6
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):
        client.set_project("project1")
        driver_fs = FeatureSet("driver-feature-set",
                               source=KafkaSource(brokers="kafka:9092",
                                                  topic="test"))
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer") as mocked_queue:
            # Ingest data into Feast
            client.ingest("driver-feature-set", dataframe)
示例#7
0
def test_update_featureset_update_featureset_and_ingest_second_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime",
        "entity_id",
        "update_feature1",
        "update_feature3",
        "update_feature4",
    ]
    subset_df = update_featureset_dataframe.iloc[5:][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    # We keep retrying this ingestion until all values make it into the buffer.
    # This is a necessary step because bigquery streaming caches table schemas
    # and as a result, rows may be lost.
    while True:
        ingestion_id = client.ingest(feature_set=update_fs, source=subset_df)
        time.sleep(15)  # wait for rows to get written to bq
        rows_ingested = get_rows_ingested(client, update_fs, ingestion_id)
        if rows_ingested == len(subset_df):
            print(
                f"Number of rows successfully ingested: {rows_ingested}. Continuing."
            )
            break
        print(
            f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion."
        )
        time.sleep(30)

    def check():
        feature_retrieval_job = client.get_batch_features(
            entity_rows=update_featureset_dataframe[["datetime",
                                                     "entity_id"]].iloc[5:],
            feature_refs=[
                "update_feature1",
                "update_feature3",
                "update_feature4",
            ],
            project=PROJECT_NAME,
        )

        output = feature_retrieval_job.to_dataframe(
            timeout_sec=180).sort_values(by=["entity_id"])
        print(output.head())

        assert output["update_feature1"].to_list(
        ) == subset_df["update_feature1"].to_list()
        assert output["update_feature3"].to_list(
        ) == subset_df["update_feature3"].to_list()
        assert output["update_feature4"].to_list(
        ) == subset_df["update_feature4"].to_list()
        clean_up_remote_files(feature_retrieval_job.get_avro_files())

    wait_for(check, timedelta(minutes=5))
示例#8
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")

        client._message_producer = MagicMock()
        client._message_producer.produce = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
示例#9
0
def test_order_by_creation_time(client):
    proc_time_fs = FeatureSet(
        "processing_time",
        features=[Feature("feature_value", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(proc_time_fs)
    time.sleep(10)
    proc_time_fs = client.get_feature_set(name="processing_time", version=1)

    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    N_ROWS = 10
    incorrect_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["WRONG"] * N_ROWS,
    })
    correct_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["CORRECT"] * N_ROWS,
    })
    client.ingest(proc_time_fs, incorrect_df)
    time.sleep(10)
    client.ingest(proc_time_fs, correct_df)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=incorrect_df[["datetime", "entity_id"]],
        feature_ids=["processing_time:1:feature_value"])
    output = feature_retrieval_job.to_dataframe()
    print(output.head())

    assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT"
                                                                    ] * N_ROWS
示例#10
0
    def test_feature_set_ingest_fail_if_pending(self, dataframe, exception,
                                                test_client, mocker):
        with pytest.raises(exception):
            test_client.set_project("project1")
            driver_fs = FeatureSet(
                "driver-feature-set",
                source=KafkaSource(brokers="kafka:9092", topic="test"),
            )
            driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
            driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
            driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
            driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

            # Register with Feast core
            test_client.apply(driver_fs)
            driver_fs = driver_fs.to_proto()
            driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING

            mocker.patch.object(
                test_client._core_service_stub,
                "GetFeatureSet",
                return_value=GetFeatureSetResponse(feature_set=driver_fs),
            )

            # Need to create a mock producer
            with patch("feast.client.get_producer"):
                # Ingest data into Feast
                test_client.ingest("driver-feature-set", dataframe, timeout=1)
示例#11
0
    def test_feature_set_types_success(self, client, dataframe, mocker):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        client.apply(all_types_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest(all_types_fs, dataframe=dataframe)
示例#12
0
    def test_register_feature_set(self, sqlite_store):
        fs = FeatureSet("my-feature-set")
        fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64))
        fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64))
        fs.add(Entity(name="my-entity-1", dtype=ValueType.INT64))
        fs._version = 1
        feature_set_spec_proto = fs.to_proto().spec

        sqlite_store.register_feature_set(feature_set_spec_proto)
        feature_row = FeatureRowProto.FeatureRow(
            feature_set="feature_set_1",
            event_timestamp=Timestamp(),
            fields=[
                FieldProto.Field(
                    name="feature_1", value=ValueProto.Value(float_val=1.2)
                ),
                FieldProto.Field(
                    name="feature_2", value=ValueProto.Value(float_val=1.2)
                ),
                FieldProto.Field(
                    name="feature_3", value=ValueProto.Value(float_val=1.2)
                ),
            ],
        )
        # sqlite_store.upsert_feature_row(feature_set_proto, feature_row)
        assert True
示例#13
0
    def test_feature_set_ingest_throws_exception_if_kafka_down(
            self, dataframe, test_client, exception, mocker):

        test_client.set_project("project1")
        driver_fs = FeatureSet(
            "driver-feature-set",
            source=KafkaSource(brokers="localhost:4412", topic="test"),
        )
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        test_client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        with pytest.raises(exception):
            test_client.ingest("driver-feature-set", dataframe)
示例#14
0
def test_update_featureset_apply_featureset_and_ingest_first_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime", "entity_id", "update_feature1", "update_feature2"
    ]
    subset_df = update_featureset_dataframe.iloc[:5][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    client.ingest(feature_set=update_fs, source=subset_df)

    time.sleep(15)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=update_featureset_dataframe[["datetime",
                                                 "entity_id"]].iloc[:5],
        feature_refs=[
            f"{PROJECT_NAME}/update_feature1",
            f"{PROJECT_NAME}/update_feature2",
        ],
    )

    output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"])
    print(output.head())

    assert output["update_feature1"].to_list(
    ) == subset_df["update_feature1"].to_list()
    assert output["update_feature2"].to_list(
    ) == subset_df["update_feature2"].to_list()
示例#15
0
def create(name):
    """
    Create a feature set
    """
    feast_client = Client(core_url=feast_config.get_config_property_or_fail(
        "core_url"))  # type: Client

    feast_client.apply(FeatureSet(name=name))
示例#16
0
    def test_from_feature_set(self):
        feature_set = FeatureSet("test", "test")
        feature_set.version = 2
        ref = FeatureSetRef.from_feature_set(feature_set)

        assert ref.name == "test"
        assert ref.project == "test"
        assert ref.version == 2
示例#17
0
 def test_update_from_source_success(self, dataframe):
     fs = FeatureSet("driver-feature-set")
     fs.update_from_dataset(
         dataframe,
         column_mapping={
             "entity_id": Entity(name="entity", dtype=ValueType.INT64)
         },
     )
     assert len(fs.features) == 3 and fs.features[1].name == "feature_2"
示例#18
0
def test_feature_class_contains_labels():
    fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"})
    fs.add(
        Feature(
            name="my-feature-1",
            dtype=ValueType.INT64,
            labels={"feature_key1": "feature_val1"},
        ))
    assert "feature_key1" in fs.features[0].labels.keys()
    assert fs.features[0].labels["feature_key1"] == "feature_val1"
    def test_register_feature_set_with_labels(self, core_service_stub):
        feature_set_name = "test_feature_set_labels"
        feature_set_proto = FeatureSet(feature_set_name, PROJECT_NAME).to_proto()
        feature_set_proto.spec.labels[self.LABEL_KEY] = self.LABEL_VALUE
        self.apply_feature_set(core_service_stub, feature_set_proto)

        retrieved_feature_set = self.get_feature_set(core_service_stub, feature_set_name, PROJECT_NAME)

        assert self.LABEL_KEY in retrieved_feature_set.spec.labels
        assert retrieved_feature_set.spec.labels[self.LABEL_KEY] == self.LABEL_VALUE
示例#20
0
def test_apply_all_featuresets(client):
    client.set_project(PROJECT_NAME)

    file_fs1 = FeatureSet(
            "file_feature_set",
            features=[Feature("feature_value1", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(file_fs1)

    gcs_fs1 = FeatureSet(
            "gcs_feature_set",
            features=[Feature("feature_value2", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(gcs_fs1)

    proc_time_fs = FeatureSet(
            "processing_time",
            features=[Feature("feature_value3", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(proc_time_fs)

    add_cols_fs = FeatureSet(
            "additional_columns",
            features=[Feature("feature_value4", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(add_cols_fs)

    historical_fs = FeatureSet(
            "historical",
            features=[Feature("feature_value5", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(historical_fs)

    fs1 = FeatureSet(
            "feature_set_1",
            features=[Feature("feature_value6", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )

    fs2 = FeatureSet(
        "feature_set_2",
        features=[Feature("other_feature_value7", ValueType.INT64)],
        entities=[Entity("other_entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(fs1)
    client.apply(fs2)
    def test_register_feature_with_labels(self, core_service_stub):
        feature_set_name = "test_feature_labels"
        feature_set_proto = FeatureSet(feature_set_name, PROJECT_NAME, features=[Feature("rating", ValueType.INT64)]) \
            .to_proto()
        feature_set_proto.spec.features[0].labels[self.LABEL_KEY] = self.LABEL_VALUE
        self.apply_feature_set(core_service_stub, feature_set_proto)

        retrieved_feature_set = self.get_feature_set(core_service_stub, feature_set_name, PROJECT_NAME)
        retrieved_feature = retrieved_feature_set.spec.features[0]

        assert self.LABEL_KEY in retrieved_feature.labels
        assert retrieved_feature.labels[self.LABEL_KEY] == self.LABEL_VALUE
示例#22
0
def feature_stats_feature_set(client):
    fv_fs = FeatureSet(
        "feature_stats",
        features=[
            Feature("strings", ValueType.STRING),
            Feature("ints", ValueType.INT64),
            Feature("floats", ValueType.FLOAT),
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(fv_fs)
    return fv_fs
示例#23
0
    def test_feature_set_ingest_failure(self, client, dataframe, exception):
        with pytest.raises(exception):
            # Create feature set
            driver_fs = FeatureSet("driver-feature-set")

            # Update based on dataset
            driver_fs.infer_fields_from_df(dataframe)

            # Register with Feast core
            client.apply(driver_fs)

            # Ingest data into Feast
            client.ingest(driver_fs, dataframe=dataframe)
示例#24
0
def test_get_batch_features_with_gs_path(client, gcs_path):
    gcs_fs1 = FeatureSet(
        "gcs_feature_set",
        features=[Feature("feature_value", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )

    client.apply(gcs_fs1)
    gcs_fs1 = client.get_feature_set(name="gcs_feature_set", version=1)

    N_ROWS = 10
    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    features_1_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": [f"{i}" for i in range(N_ROWS)],
    })
    client.ingest(gcs_fs1, features_1_df)

    # Rename column (datetime -> event_timestamp)
    features_1_df = features_1_df.rename(
        columns={"datetime": "event_timestamp"})

    # Output file to local
    file_name = "gcs_feature_set.avro"
    to_avro(df=features_1_df, file_path_or_buffer=file_name)

    uri = urlparse(gcs_path)
    bucket = uri.hostname
    ts = int(time.time())
    remote_path = str(uri.path).strip("/") + f"{ts}/{file_name}"

    # Upload file to gcs
    storage_client = storage.Client(project=None)
    bucket = storage_client.get_bucket(bucket)
    blob = bucket.blob(remote_path)
    blob.upload_from_filename(file_name)

    feature_retrieval_job = client.get_batch_features(
        entity_rows=f"{gcs_path}{ts}/*",
        feature_ids=["gcs_feature_set:1:feature_value"])

    output = feature_retrieval_job.to_dataframe()
    print(output.head())

    assert output["entity_id"].to_list() == [
        int(i) for i in output["gcs_feature_set_v1_feature_value"].to_list()
    ]
def test_basic_ingest_retrieval_str(client):
    # Set to another project to test ingestion based on current project context
    client.set_project(PROJECT_NAME + "_NS1")
    customer_fs = FeatureSet(
        name="cust_fs",
        features=[
            Feature(name="cust_rating", dtype=ValueType.INT64),
            Feature(name="cust_cost", dtype=ValueType.FLOAT),
        ],
        entities=[Entity("cust_id", ValueType.INT64)],
        max_age=Duration(seconds=3600),
    )
    client.apply(customer_fs)

    N_ROWS = 2
    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    cust_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "cust_id": [i for i in range(N_ROWS)],
        "cust_rating": [i for i in range(N_ROWS)],
        "cust_cost": [float(i) + 0.5 for i in range(N_ROWS)],
    })
    client.ingest("cust_fs", cust_df, timeout=600)
    time.sleep(15)

    online_request_entity = [{"cust_id": 0}, {"cust_id": 1}]
    online_request_features = ["cust_rating", "cust_cost"]

    def try_get_features():
        response = client.get_online_features(
            entity_rows=online_request_entity,
            feature_refs=online_request_features)
        return response, True

    online_features_actual = wait_retry_backoff(
        retry_fn=try_get_features,
        timeout_secs=90,
        timeout_msg="Timed out trying to get online feature values",
    )

    online_features_expected = {
        "cust_id": [0, 1],
        "cust_rating": [0, 1],
        "cust_cost": [0.5, 1.5],
    }

    assert online_features_actual.to_dict() == online_features_expected
示例#26
0
    def test_feature_set_ingest_failure(self, client, dataframe, exception):
        with pytest.raises(exception):
            # Create feature set
            driver_fs = FeatureSet("driver-feature-set")
            driver_fs.source = KafkaSource(topic="feature-topic",
                                           brokers="fake.broker.com")
            client._message_producer = MagicMock()
            client._message_producer.produce = MagicMock()

            # Update based on dataset
            driver_fs.infer_fields_from_df(dataframe)

            # Register with Feast core
            client.apply(driver_fs)

            # Ingest data into Feast
            client.ingest(driver_fs, dataframe=dataframe)
示例#27
0
    def test_import_tfx_schema(self):
        tests_folder = pathlib.Path(__file__).parent
        test_input_schema_json = open(tests_folder / "data" /
                                      "tensorflow_metadata" /
                                      "bikeshare_schema.json").read()
        test_input_schema = schema_pb2.Schema()
        json_format.Parse(test_input_schema_json, test_input_schema)

        feature_set = FeatureSet(
            name="bikeshare",
            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="name", dtype=ValueType.STRING),
                Feature(name="status", dtype=ValueType.STRING),
                Feature(name="latitude", dtype=ValueType.FLOAT),
                Feature(name="longitude", dtype=ValueType.FLOAT),
                Feature(name="location", dtype=ValueType.STRING),
            ],
        )

        # Before update
        for entity in feature_set.entities:
            assert entity.presence is None
            assert entity.shape is None
        for feature in feature_set.features:
            assert feature.presence is None
            assert feature.shape is None
            assert feature.string_domain is None
            assert feature.float_domain is None
            assert feature.int_domain is None

        feature_set.import_tfx_schema(test_input_schema)

        # After update
        for entity in feature_set.entities:
            assert entity.presence is not None
            assert entity.shape is not None
        for feature in feature_set.features:
            assert feature.presence is not None
            assert feature.shape is not None
            if feature.name in ["location", "name", "status"]:
                assert feature.string_domain is not None
            elif feature.name in ["latitude", "longitude"]:
                assert feature.float_domain is not None
            elif feature.name in ["station_id"]:
                assert feature.int_domain is not None
示例#28
0
    def test_feature_set_types_success(self, test_client, dataframe, mocker):

        test_client.set_project("project1")

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                # Feature(name="bool_list_feature",
                # dtype=ValueType.BOOL_LIST), # TODO: Add support for this
                #  type again https://github.com/feast-dev/feast/issues/341
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        test_client.apply(all_types_fs)

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=all_types_fs.to_proto()),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer"):
            # Ingest data into Feast
            test_client.ingest(all_types_fs, dataframe)
示例#29
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
示例#30
0
    def test_feature_set_import_export_yaml(self):

        test_feature_set = FeatureSet(
            name="bikeshare",
            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="name", dtype=ValueType.STRING),
                Feature(name="longitude", dtype=ValueType.FLOAT),
                Feature(name="location", dtype=ValueType.STRING),
            ],
        )

        # Create a string YAML representation of the feature set
        string_yaml = test_feature_set.to_yaml()

        # Create a new feature set object from the YAML string
        actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml)

        # Ensure equality is upheld to original feature set
        assert test_feature_set == actual_feature_set_from_string