Пример #1
0
def test_basic_register_feature_set_success(client):
    # Register feature set without project
    cust_trans_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    driver_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/driver_fs.yaml")
    client.apply(cust_trans_fs_expected)
    client.apply(driver_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions")
    assert cust_trans_fs_actual == cust_trans_fs_expected
    driver_fs_actual = client.get_feature_set("driver")
    assert driver_fs_actual == driver_fs_expected

    # Register feature set with project
    cust_trans_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    client.set_project(PROJECT_NAME)
    client.apply(cust_trans_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions",
                                                  project=PROJECT_NAME)
    assert cust_trans_fs_actual == cust_trans_fs_expected

    # Register feature set with labels
    driver_unlabelled_fs = FeatureSet(
        "driver_unlabelled",
        features=[
            Feature("rating", ValueType.FLOAT),
            Feature("cost", ValueType.FLOAT)
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    driver_labeled_fs_expected = FeatureSet(
        "driver_labeled",
        features=[
            Feature("rating", ValueType.FLOAT),
            Feature("cost", ValueType.FLOAT)
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
        labels={"key1": "val1"},
    )
    client.set_project(PROJECT_NAME)
    client.apply(driver_unlabelled_fs)
    client.apply(driver_labeled_fs_expected)
    driver_fs_actual = client.list_feature_sets(project=PROJECT_NAME,
                                                labels={"key1": "val1"})[0]
    assert driver_fs_actual == driver_labeled_fs_expected

    # reset client's project for other tests
    client.set_project()
Пример #2
0
def test_sources_deduplicate_ingest_jobs(client):
    source = KafkaSource("localhost:9092", "feast-features")
    alt_source = KafkaSource("localhost:9092", "feast-data")

    def get_running_jobs():
        return [
            job for job in client.list_ingest_jobs()
            if job.status == IngestionJobStatus.RUNNING
        ]

    # stop all ingest jobs
    ingest_jobs = client.list_ingest_jobs()
    for ingest_job in ingest_jobs:
        client.stop_ingest_job(ingest_job)
    for ingest_job in ingest_jobs:
        ingest_job.wait(IngestionJobStatus.ABORTED)

    # register multiple featuresets with the same source
    # only one ingest job should spawned due to test ingest job deduplication
    cust_trans_fs = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    driver_fs = FeatureSet.from_yaml(f"{DIR_PATH}/basic/driver_fs.yaml")
    cust_trans_fs.source, driver_fs.source = source, source
    client.apply(cust_trans_fs)
    client.apply(driver_fs)

    while len(get_running_jobs()) != 1:
        assert 0 <= len(get_running_jobs()) <= 1
        time.sleep(1)

    # update feature sets with different sources, should spawn 2 ingest jobs
    driver_fs.source = alt_source
    client.apply(driver_fs)

    while len(get_running_jobs()) != 2:
        assert 1 <= len(get_running_jobs()) <= 2
        time.sleep(1)

    # update feature sets with same source again, should spawn only 1 ingest job
    driver_fs.source = source
    client.apply(driver_fs)

    while len(get_running_jobs()) != 1:
        assert 1 <= len(get_running_jobs()) <= 2
        time.sleep(1)
def test_basic_register_feature_set_success(client):
    # Register feature set without project
    cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml")
    driver_fs_expected = FeatureSet.from_yaml("basic/driver_fs.yaml")
    client.apply(cust_trans_fs_expected)
    client.apply(driver_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions")
    assert cust_trans_fs_actual == cust_trans_fs_expected
    driver_fs_actual = client.get_feature_set("driver")
    assert driver_fs_actual == driver_fs_expected

    # Register feature set with project
    cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml")
    client.set_project(PROJECT_NAME)
    client.apply(cust_trans_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions",
                                                  project=PROJECT_NAME)
    assert cust_trans_fs_actual == cust_trans_fs_expected

    # reset client's project for other tests
    client.set_project()
Пример #4
0
    def test_export_tfx_schema(self):
        tests_folder = pathlib.Path(__file__).parent
        test_input_feature_set = FeatureSet.from_yaml(
            str(tests_folder / "data" / "tensorflow_metadata" /
                "bikeshare_feature_set.yaml"))

        expected_schema_json = open(tests_folder / "data" /
                                    "tensorflow_metadata" /
                                    "bikeshare_schema.json").read()
        expected_schema = schema_pb2.Schema()
        json_format.Parse(expected_schema_json, expected_schema)
        _make_tfx_schema_domain_info_inline(expected_schema)

        actual_schema = test_input_feature_set.export_tfx_schema()

        assert len(actual_schema.feature) == len(expected_schema.feature)
        for actual, expected in zip(actual_schema.feature,
                                    expected_schema.feature):
            assert actual.SerializeToString() == expected.SerializeToString()
Пример #5
0
def test_large_volume_register_feature_set_success(client):
    cust_trans_fs = FeatureSet.from_yaml(
        "large_volume/cust_trans_large_fs.yaml")

    # Register feature set
    client.apply(cust_trans_fs)

    # Feast Core needs some time to fully commit the FeatureSet applied
    # when there is no existing job yet for the Featureset
    time.sleep(10)
    cust_trans_fs_applied = client.get_feature_set(
        name="customer_transactions_large")

    if cust_trans_fs is None:
        raise Exception(
            "Client cannot retrieve 'customer_transactions' FeatureSet "
            "after registration. Either Feast Core does not save the "
            "FeatureSet correctly or the client needs to wait longer for FeatureSet "
            "to be committed.")
Пример #6
0
def test_basic_register_feature_set_success(client):
    # Load feature set from file
    cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml")

    client.set_project(PROJECT_NAME)

    # Register feature set
    client.apply(cust_trans_fs_expected)

    cust_trans_fs_actual = client.get_feature_set(name="customer_transactions")

    assert cust_trans_fs_actual == cust_trans_fs_expected

    if cust_trans_fs_actual is None:
        raise Exception(
            "Client cannot retrieve 'customer_transactions' FeatureSet "
            "after registration. Either Feast Core does not save the "
            "FeatureSet correctly or the client needs to wait longer for FeatureSet "
            "to be committed.")
Пример #7
0
    def test_feature_set_import_export_yaml(self):

        test_feature_set = FeatureSet(
            name="bikeshare",
            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="name", dtype=ValueType.STRING),
                Feature(name="longitude", dtype=ValueType.FLOAT),
                Feature(name="location", dtype=ValueType.STRING),
            ],
        )

        # Create a string YAML representation of the feature set
        string_yaml = test_feature_set.to_yaml()

        # Create a new feature set object from the YAML string
        actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml)

        # Ensure equality is upheld to original feature set
        assert test_feature_set == actual_feature_set_from_string
Пример #8
0
def test_basic_register_feature_set_success(client):
    # Load feature set from file
    cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml")

    # Register feature set
    client.apply(cust_trans_fs_expected)

    # Feast Core needs some time to fully commit the FeatureSet applied
    # when there is no existing job yet for the Featureset
    time.sleep(15)

    cust_trans_fs_actual = client.get_feature_set(name="customer_transactions")

    assert cust_trans_fs_actual == cust_trans_fs_expected

    if cust_trans_fs_actual is None:
        raise Exception(
            "Client cannot retrieve 'customer_transactions' FeatureSet "
            "after registration. Either Feast Core does not save the "
            "FeatureSet correctly or the client needs to wait longer for FeatureSet "
            "to be committed.")
Пример #9
0
def test_all_types_parquet_register_feature_set_success(client):
    # Load feature set from file
    all_types_parquet_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/all_types_parquet/all_types_parquet.yaml")

    # Register feature set
    client.apply(all_types_parquet_expected)

    # Feast Core needs some time to fully commit the FeatureSet applied
    # when there is no existing job yet for the Featureset
    time.sleep(30)

    all_types_parquet_actual = client.get_feature_set(name="all_types_parquet")

    assert all_types_parquet_actual == all_types_parquet_expected

    if all_types_parquet_actual is None:
        raise Exception(
            "Client cannot retrieve 'customer_transactions' FeatureSet "
            "after registration. Either Feast Core does not save the "
            "FeatureSet correctly or the client needs to wait longer for FeatureSet "
            "to be committed.")
Пример #10
0
def test_basic(client):

    cust_trans_fs = client.get_feature_set(name="customer_transactions",
                                           version=1)

    # TODO: Fix source handling in Feast Core to support true idempotent
    #  applies. In this case, applying a feature set without a source will
    #  create a new feature set every time.

    if cust_trans_fs is None:
        # Load feature set from file
        cust_trans_fs = FeatureSet.from_yaml("basic/cust_trans_fs.yaml")

        # Register feature set
        client.apply(cust_trans_fs)

        # Feast Core needs some time to fully commit the FeatureSet applied
        # when there is no existing job yet for the Featureset
        time.sleep(15)
        cust_trans_fs = client.get_feature_set(name="customer_transactions",
                                               version=1)

        if cust_trans_fs is None:
            raise Exception(
                "Client cannot retrieve 'customer_transactions' FeatureSet "
                "after registration. Either Feast Core does not save the "
                "FeatureSet correctly or the client needs to wait longer for FeatureSet "
                "to be committed.")

    offset = random.randint(1000, 100000)  # ensure a unique key space is used
    customer_data = pd.DataFrame({
        "datetime":
        [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(5)],
        "customer_id": [offset + inc for inc in range(5)],
        "daily_transactions": [np.random.rand() for _ in range(5)],
        "total_transactions": [512 for _ in range(5)],
    })

    # Ingest customer transaction data
    cust_trans_fs.ingest(dataframe=customer_data)

    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)

        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "customer_id":
                        Value(int64_val=customer_data.iloc[0]["customer_id"])
                    })
            ],
            feature_ids=[
                "customer_transactions:1:daily_transactions",
                "customer_transactions:1:total_transactions",
            ],
        )  # type: GetOnlineFeaturesResponse

        if response is None:
            continue

        returned_daily_transactions = float(
            response.field_values[0].
            fields["customer_transactions:1:daily_transactions"].float_val)
        sent_daily_transactions = float(
            customer_data.iloc[0]["daily_transactions"])

        if math.isclose(
                sent_daily_transactions,
                returned_daily_transactions,
                abs_tol=FLOAT_TOLERANCE,
        ):
            break