Python FeatureStore.apply примеры использования

Язык программирования: Python

Пространство имен/Пакет: feast

Класс/Тип: FeatureStore

Метод/Функция: apply

Примеров на hotexamples.com: 12

Python FeatureStore.apply - 12 примеров найдено. Это лучшие примеры Python кода для feast.FeatureStore.apply, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FeatureStore(26)

apply(12)

get_online_features(10)

get_historical_features(8)

list_feature_views(5)

materialize(5)

materialize_incremental(4)

get_entity(2)

get_feature_service(2)

get_saved_dataset(2)

create_saved_dataset(1)

get_feature_view(1)

_get_unique_entities(1)

list_data_sources(1)

list_on_demand_feature_views(1)

refresh_registry(1)

serve(1)

Пример #1

Показать файл

def test_usage_off():
    old_environ = dict(os.environ)
    test_usage_id = str(uuid.uuid4())
    os.environ["FEAST_IS_USAGE_TEST"] = "True"
    os.environ["FEAST_USAGE"] = "False"
    os.environ["FEAST_FORCE_USAGE_UUID"] = test_usage_id

    with tempfile.TemporaryDirectory() as temp_dir:
        test_feature_store = FeatureStore(
            config=RepoConfig(
                registry=os.path.join(temp_dir, "registry.db"),
                project="fake_project",
                provider="local",
                online_store=SqliteOnlineStoreConfig(
                    path=os.path.join(temp_dir, "online.db")
                ),
            )
        )
        entity = Entity(
            name="driver_car_id",
            description="Car driver id",
            value_type=ValueType.STRING,
            labels={"team": "matchmaking"},
        )
        test_feature_store.apply([entity])

        os.environ.clear()
        os.environ.update(old_environ)
        sleep(30)
        rows = read_bigquery_usage_id(test_usage_id)
        assert rows.total_rows == 0

Пример #2

Показать файл

Файл: test.py Проект: shihabuddinbuet/feast

def main():
    pd.set_option("display.max_columns", None)
    pd.set_option("display.width", 1000)

    # Load the feature store from the current path
    fs = FeatureStore(repo_path=".")

    # Deploy the feature store to AWS
    print("Deploying feature store to AWS...")
    fs.apply([driver, driver_hourly_stats_view])

    # Select features
    feature_refs = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"]

    print("Loading features into the online store...")
    fs.materialize_incremental(end_date=datetime.now())

    print("Retrieving online features...")

    # Retrieve features from the online store (DynamoDB)
    online_features = fs.get_online_features(
        feature_refs=feature_refs,
        entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}],
    ).to_dict()

    print(pd.DataFrame.from_dict(online_features))

Пример #3

Показать файл

def store_offline(feature_store: FeatureStore,
                  dataframe: FlyteSchema) -> FeatureStore:
    horse_colic_entity = Entity(name="Hospital Number",
                                value_type=ValueType.STRING)

    horse_colic_feature_view = FeatureView(
        name="horse_colic_stats",
        entities=["Hospital Number"],
        features=[
            Feature(name="rectal temperature", dtype=ValueType.FLOAT),
            Feature(name="total protein", dtype=ValueType.FLOAT),
            Feature(name="peripheral pulse", dtype=ValueType.FLOAT),
            Feature(name="surgical lesion", dtype=ValueType.STRING),
            Feature(name="abdominal distension", dtype=ValueType.FLOAT),
            Feature(name="nasogastric tube", dtype=ValueType.STRING),
            Feature(name="outcome", dtype=ValueType.STRING),
            Feature(name="packed cell volume", dtype=ValueType.FLOAT),
            Feature(name="nasogastric reflux PH", dtype=ValueType.FLOAT),
        ],
        batch_source=FileSource(
            path=str(dataframe.remote_path),
            event_timestamp_column="timestamp",
        ),
        ttl=timedelta(days=1),
    )

    # Ingest the data into feast
    feature_store.apply([horse_colic_entity, horse_colic_feature_view])

    return feature_store

Пример #4

Показать файл

Файл: test_telemetry.py Проект: szalai1/feast

def test_telemetry_on():
    old_environ = dict(os.environ)
    test_telemetry_id = str(uuid.uuid4())
    os.environ["FEAST_FORCE_TELEMETRY_UUID"] = test_telemetry_id
    os.environ["FEAST_IS_TELEMETRY_TEST"] = "True"
    os.environ["FEAST_TELEMETRY"] = "True"

    with tempfile.TemporaryDirectory() as temp_dir:
        test_feature_store = FeatureStore(
            config=RepoConfig(
                registry=os.path.join(temp_dir, "registry.db"),
                project="fake_project",
                provider="local",
                online_store=SqliteOnlineStoreConfig(
                    path=os.path.join(temp_dir, "online.db")
                ),
            )
        )
        entity = Entity(
            name="driver_car_id",
            description="Car driver id",
            value_type=ValueType.STRING,
            labels={"team": "matchmaking"},
        )

        test_feature_store.apply([entity])

        os.environ.clear()
        os.environ.update(old_environ)
        ensure_bigquery_telemetry_id_with_retry(test_telemetry_id)

Пример #5

Показать файл

def setup_feature_store():
    """Prepares the local environment for a FeatureStore docstring test."""
    from datetime import datetime, timedelta

    from feast import Entity, Feature, FeatureStore, FeatureView, FileSource, ValueType
    from feast.repo_operations import init_repo

    init_repo("feature_repo", "local")
    fs = FeatureStore(repo_path="feature_repo")
    driver = Entity(
        name="driver_id",
        value_type=ValueType.INT64,
        description="driver id",
    )
    driver_hourly_stats = FileSource(
        path="feature_repo/data/driver_stats.parquet",
        event_timestamp_column="event_timestamp",
        created_timestamp_column="created",
    )
    driver_hourly_stats_view = FeatureView(
        name="driver_hourly_stats",
        entities=["driver_id"],
        ttl=timedelta(seconds=86400 * 1),
        features=[
            Feature(name="conv_rate", dtype=ValueType.FLOAT),
            Feature(name="acc_rate", dtype=ValueType.FLOAT),
            Feature(name="avg_daily_trips", dtype=ValueType.INT64),
        ],
        batch_source=driver_hourly_stats,
    )
    fs.apply([driver_hourly_stats_view, driver])
    fs.materialize(
        start_date=datetime.utcnow() - timedelta(hours=3),
        end_date=datetime.utcnow() - timedelta(minutes=10),
    )

Пример #6

Показать файл

Файл: test.py Проект: feast-dev/feast

def main():
    pd.set_option("display.max_columns", None)
    pd.set_option("display.width", 1000)

    # Load the feature store from the current path
    fs = FeatureStore(repo_path=".")

    # Deploy the feature store to Snowflake
    print("Deploying feature store to Snowflake...")
    fs.apply([driver, driver_stats_fv])

    # Select features
    features = [
        "driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"
    ]

    # Create an entity dataframe. This is the dataframe that will be enriched with historical features
    entity_df = pd.DataFrame({
        "event_timestamp": [
            pd.Timestamp(dt, unit="ms", tz="UTC").round("ms")
            for dt in pd.date_range(
                start=datetime.now() - timedelta(days=3),
                end=datetime.now(),
                periods=3,
            )
        ],
        "driver_id": [1001, 1002, 1003],
    })

    print("Retrieving training data...")

    # Retrieve historical features by joining the entity dataframe to the Snowflake table source
    training_df = fs.get_historical_features(features=features,
                                             entity_df=entity_df).to_df()

    print()
    print(training_df)

    print()
    print("Loading features into the online store...")
    fs.materialize_incremental(end_date=datetime.now())

    print()
    print("Retrieving online features...")

    # Retrieve features from the online store
    online_features = fs.get_online_features(
        features=features,
        entity_rows=[{
            "driver_id": 1001
        }, {
            "driver_id": 1002
        }],
    ).to_dict()

    print()
    print(pd.DataFrame.from_dict(online_features))

Пример #7

Показать файл

Файл: test_telemetry.py Проект: leonid133/feast

def test_telemetry_on():
    old_environ = dict(os.environ)
    test_telemetry_id = str(uuid.uuid4())
    os.environ["FEAST_FORCE_TELEMETRY_UUID"] = test_telemetry_id
    os.environ["FEAST_IS_TELEMETRY_TEST"] = "True"
    os.environ["FEAST_TELEMETRY"] = "True"

    test_feature_store = FeatureStore()
    entity = Entity(
        name="driver_car_id",
        description="Car driver id",
        value_type=ValueType.STRING,
        labels={"team": "matchmaking"},
    )

    test_feature_store.apply([entity])

    os.environ.clear()
    os.environ.update(old_environ)
    ensure_bigquery_telemetry_id_with_retry(test_telemetry_id)

Пример #8

Показать файл

Файл: test_telemetry.py Проект: leonid133/feast

def test_telemetry_off():
    old_environ = dict(os.environ)
    test_telemetry_id = str(uuid.uuid4())
    os.environ["FEAST_IS_TELEMETRY_TEST"] = "True"
    os.environ["FEAST_TELEMETRY"] = "False"
    os.environ["FEAST_FORCE_TELEMETRY_UUID"] = test_telemetry_id

    test_feature_store = FeatureStore()
    entity = Entity(
        name="driver_car_id",
        description="Car driver id",
        value_type=ValueType.STRING,
        labels={"team": "matchmaking"},
    )
    test_feature_store.apply([entity])

    os.environ.clear()
    os.environ.update(old_environ)
    sleep(30)
    rows = read_bigquery_telemetry_id(test_telemetry_id)
    assert rows.total_rows == 0

Пример #9

Показать файл

# Define an entity for the driver. You can think of entity as a primary key used to
# fetch features.
driver = Entity(
    name="driver_id",
    value_type=ValueType.INT64,
    description="driver id",
)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
# data to our model online.
driver_hourly_stats_view = FeatureView(
    name="driver_hourly_stats",
    entities=["driver_id"],
    ttl=Duration(seconds=86400 * 365),
    features=[
        Feature(name="conv_rate", dtype=ValueType.DOUBLE),
        Feature(name="acc_rate", dtype=ValueType.FLOAT),
        Feature(name="avg_daily_trips", dtype=ValueType.INT64),
    ],
    online=True,
    batch_source=driver_hourly_stats,
    tags={},
)

fs = FeatureStore("")
fs.apply([driver_hourly_stats_view, driver])

now = datetime.now()
fs.materialize_incremental(now)

Пример #10

Показать файл

Файл: materialize.py Проект: Shopify/feast

)

benchmark_feature_views = [
    FeatureView(
        name=f"feature_view_{i}",
        entities=["entity"],
        ttl=Duration(seconds=86400),
        features=[
            Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64)
            for j in range(10)
        ],
        online=True,
        batch_source=generated_data_source,
    ) for i in range(25)
]

benchmark_feature_service = FeatureService(
    name=f"benchmark_feature_service",
    features=benchmark_feature_views,
)

fs = FeatureStore(".")
fs.apply([
    driver_hourly_stats_view, driver, entity, benchmark_feature_service,
    *benchmark_feature_views
])

now = datetime.now()
fs.materialize(start, now)
print("Materialization finished")

Пример #11

Показать файл

def construct_test_environment(
    test_repo_config: TestRepoConfig,
    create_and_apply: bool = False,
    materialize: bool = False,
) -> Environment:
    """
    This method should take in the parameters from the test repo config and created a feature repo, apply it,
    and return the constructed feature store object to callers.

    This feature store object can be interacted for the purposes of tests.
    The user is *not* expected to perform any clean up actions.

    :param test_repo_config: configuration
    :return: A feature store built using the supplied configuration.
    """
    df = create_dataset()

    project = f"test_correctness_{str(uuid.uuid4()).replace('-', '')[:8]}"

    module_name, config_class_name = test_repo_config.offline_store_creator.rsplit(
        ".", 1)

    offline_creator: DataSourceCreator = importer.get_class_from_type(
        module_name, config_class_name, "DataSourceCreator")(project)
    ds = offline_creator.create_data_source(project,
                                            df,
                                            field_mapping={
                                                "ts_1": "ts",
                                                "id": "driver_id"
                                            })
    offline_store = offline_creator.create_offline_store_config()
    online_store = test_repo_config.online_store

    with tempfile.TemporaryDirectory() as repo_dir_name:
        config = RepoConfig(
            registry=str(Path(repo_dir_name) / "registry.db"),
            project=project,
            provider=test_repo_config.provider,
            offline_store=offline_store,
            online_store=online_store,
            repo_path=repo_dir_name,
        )
        fs = FeatureStore(config=config)
        environment = Environment(
            name=project,
            test_repo_config=test_repo_config,
            feature_store=fs,
            data_source=ds,
            data_source_creator=offline_creator,
        )

        fvs = []
        entities = []
        try:
            if create_and_apply:
                entities.extend([driver(), customer()])
                fvs.extend([
                    environment.driver_stats_feature_view(),
                    environment.customer_feature_view(),
                ])
                fs.apply(fvs + entities)

            if materialize:
                fs.materialize(environment.start_date, environment.end_date)

            yield environment
        finally:
            offline_creator.teardown()
            fs.teardown()

Пример #12

Показать файл

Файл: materialize.py Проект: feast-dev/feast

def generate_data(num_rows: int, num_features: int,
                  destination: str) -> pd.DataFrame:
    features = [f"feature_{i}" for i in range(num_features)]
    columns = ["entity", "event_timestamp"] + features
    df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns)
    df["event_timestamp"] = datetime.utcnow()
    for column in features:
        df[column] = np.random.randint(1, num_rows, num_rows)

    df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype(
        pd.StringDtype())

    df.to_parquet(destination)


generate_data(10**3, 250, "benchmark_data.parquet")

fs = FeatureStore(".")
fs.apply([
    driver_hourly_stats_view,
    transformed_conv_rate,
    driver,
    entity,
    benchmark_feature_service,
    *benchmark_feature_views,
])

now = datetime.now()
fs.materialize(start, now)
print("Materialization finished")