def prep_local_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]: with tempfile.NamedTemporaryFile(suffix=".parquet") as f: df = create_dataset() f.close() df.to_parquet(f.name) file_source = FileSource( file_format=ParquetFormat(), file_url=f"file://{f.name}", event_timestamp_column="ts", created_timestamp_column="created_ts", date_partition_column="", field_mapping={ "ts_1": "ts", "id": "driver_id" }, ) fv = get_feature_view(file_source) with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project= f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}", provider="local", online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig( path=str(Path(data_dir_name) / "online_store.db"))), ) fs = FeatureStore(config=config) fs.apply([fv]) yield fs, fv
def prep_dynamodb_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]: with tempfile.NamedTemporaryFile(suffix=".parquet") as f: df = create_dataset() f.close() df.to_parquet(f.name) file_source = FileSource( file_format=ParquetFormat(), file_url=f"file://{f.name}", event_timestamp_column="ts", created_timestamp_column="created_ts", date_partition_column="", field_mapping={"ts_1": "ts", "id": "driver_id"}, ) fv = get_feature_view(file_source) e = Entity( name="driver", description="id for driver", join_key="driver_id", value_type=ValueType.INT32, ) with tempfile.TemporaryDirectory() as repo_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}", provider="aws", online_store=DynamoDBOnlineStoreConfig(region="us-west-2"), offline_store=FileOfflineStoreConfig(), ) fs = FeatureStore(config=config) fs.apply([fv, e]) yield fs, fv
def test_usage_on(dummy_exporter, enabling_toggle): _reload_feast() from feast.feature_store import FeatureStore with tempfile.TemporaryDirectory() as temp_dir: test_feature_store = FeatureStore(config=RepoConfig( registry=os.path.join(temp_dir, "registry.db"), project="fake_project", provider="local", online_store=SqliteOnlineStoreConfig( path=os.path.join(temp_dir, "online.db")), )) entity = Entity( name="driver_car_id", description="Car driver id", value_type=ValueType.STRING, tags={"team": "matchmaking"}, ) test_feature_store.apply([entity]) assert len(dummy_exporter) == 3 assert { "entrypoint": "feast.infra.local.LocalRegistryStore.get_registry_proto" }.items() <= dummy_exporter[0].items() assert { "entrypoint": "feast.infra.local.LocalRegistryStore.update_registry_proto" }.items() <= dummy_exporter[1].items() assert { "entrypoint": "feast.feature_store.FeatureStore.apply" }.items() <= dummy_exporter[2].items()
def feature_view_list(ctx: click.Context): """ List all feature views """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for feature_view in [ *store.list_feature_views(), *store.list_request_feature_views(), *store.list_on_demand_feature_views(), ]: entities = set() if isinstance(feature_view, FeatureView): entities.update(feature_view.entities) elif isinstance(feature_view, OnDemandFeatureView): for backing_fv in feature_view.inputs.values(): if isinstance(backing_fv, FeatureView): entities.update(backing_fv.entities) table.append([ feature_view.name, entities if len(entities) > 0 else "n/a", type(feature_view).__name__, ]) from tabulate import tabulate print( tabulate(table, headers=["NAME", "ENTITIES", "TYPE"], tablefmt="plain"))
def serve_transformations_command(ctx: click.Context, port: int): """[Experimental] Start a the feature consumption server locally on a given port.""" repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) store.serve_transformations(port)
def test_non_local_feature_repo() -> None: """ Test running apply on a sample repo, and make sure the infra gets created. """ runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(""" project: foo registry: data/registry.db provider: local online_store: path: data/online_store.db """)) repo_example = repo_path / "example.py" repo_example.write_text( (Path(__file__).parent / "example_feature_repo_1.py").read_text()) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) fs = FeatureStore(repo_path=str(repo_path)) assertpy.assert_that(fs.list_feature_views()).is_length(3) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0)
def serve_command(ctx: click.Context, host: str, port: int, no_access_log: bool): """Start a feature server locally on a given port.""" repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) store.serve(host, port, no_access_log)
def test_bigquery_query_to_datastore_correctness(self): # create dataset ts = pd.Timestamp.now(tz="UTC").round("ms") data = { "id": [1, 2, 1], "value": [0.1, 0.2, 0.3], "ts_1": [ts - timedelta(minutes=2), ts, ts], "created_ts": [ts, ts, ts], } df = pd.DataFrame.from_dict(data) # load dataset into BigQuery job_config = bigquery.LoadJobConfig() table_id = f"{self.gcp_project}.{self.bigquery_dataset}.query_correctness_{int(time.time())}" query = f"SELECT * FROM `{table_id}`" job = self.client.load_table_from_dataframe(df, table_id, job_config=job_config) job.result() # create FeatureView fv = FeatureView( name="test_bq_query_correctness", entities=["driver_id"], features=[Feature("value", ValueType.FLOAT)], ttl=timedelta(minutes=5), input=BigQuerySource( event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping={ "ts_1": "ts", "id": "driver_id" }, date_partition_column="", query=query, ), ) config = RepoConfig( metadata_store="./metadata.db", project=f"test_bq_query_correctness_{int(time.time())}", provider="gcp", ) fs = FeatureStore(config=config) fs.apply([fv]) # run materialize() fs.materialize( [fv.name], datetime.utcnow() - timedelta(minutes=5), datetime.utcnow() - timedelta(minutes=0), ) # check result of materialize() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 1 }]).to_dict() assert abs(response_dict[f"{fv.name}:value"][0] - 0.3) < 1e-6
def feature_store_with_s3_registry(): return FeatureStore(config=RepoConfig( registry= f"s3://feast-integration-tests/registries/{int(time.time() * 1000)}/registry.db", project="default", provider="aws", online_store=DynamoDBOnlineStoreConfig(region="us-west-2"), offline_store=FileOfflineStoreConfig(), ))
def test_apply_remote_repo(): fd, registry_path = mkstemp() fd, online_store_path = mkstemp() return FeatureStore(config=RepoConfig( registry=registry_path, project="default", provider="local", online_store=SqliteOnlineStoreConfig(path=online_store_path), ))
def feature_store_with_local_registry(): fd, registry_path = mkstemp() fd, online_store_path = mkstemp() return FeatureStore(config=RepoConfig( registry=registry_path, project="default", provider="local", online_store=SqliteOnlineStoreConfig(path=online_store_path), ))
def feature_store_with_local_registry(self): fd, registry_path = mkstemp() fd, online_store_path = mkstemp() return FeatureStore(config=RepoConfig( metadata_store=registry_path, project="default", provider="local", online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig( path=online_store_path)), ))
def test_exception_usage_off(dummy_exporter, enabling_toggle): enabling_toggle.__bool__.return_value = False _reload_feast() from feast.feature_store import FeatureStore with pytest.raises(OSError): FeatureStore("/tmp/non_existent_directory") assert not dummy_exporter
def prep_bq_fs_and_fv( bq_source_type: str, ) -> Iterator[Tuple[FeatureStore, FeatureView]]: client = bigquery.Client() gcp_project = client.project bigquery_dataset = "test_ingestion" dataset = bigquery.Dataset(f"{gcp_project}.{bigquery_dataset}") client.create_dataset(dataset, exists_ok=True) dataset.default_table_expiration_ms = (1000 * 60 * 60 * 24 * 14 ) # 2 weeks in milliseconds client.update_dataset(dataset, ["default_table_expiration_ms"]) df = create_dataset() job_config = bigquery.LoadJobConfig() table_ref = f"{gcp_project}.{bigquery_dataset}.{bq_source_type}_correctness_{int(time.time_ns())}" query = f"SELECT * FROM `{table_ref}`" job = client.load_table_from_dataframe(df, table_ref, job_config=job_config) job.result() bigquery_source = BigQuerySource( table_ref=table_ref if bq_source_type == "table" else None, query=query if bq_source_type == "query" else None, event_timestamp_column="ts", created_timestamp_column="created_ts", date_partition_column="", field_mapping={ "ts_1": "ts", "id": "driver_id" }, ) fv = driver_feature_view(bigquery_source) e = Entity( name="driver", description="id for driver", join_key="driver_id", value_type=ValueType.INT32, ) with tempfile.TemporaryDirectory() as repo_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}", provider="gcp", online_store=DatastoreOnlineStoreConfig( namespace="integration_test"), ) fs = FeatureStore(config=config) fs.apply([fv, e]) yield fs, fv fs.teardown()
def disable_alpha_features(ctx: click.Context): """ Disables all alpha features """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) repo_path = str(repo) store = FeatureStore(repo_path=repo_path) store.config.flags = None store.config.write_to_path(Path(repo_path))
def test_exception_usage_on(dummy_exporter, enabling_toggle): _reload_feast() from feast.feature_store import FeatureStore with pytest.raises(OSError): FeatureStore("/tmp/non_existent_directory") assert len(dummy_exporter) == 1 assert { "entrypoint": "feast.feature_store.FeatureStore.__init__", "exception": repr(FileNotFoundError(2, "No such file or directory")), }.items() <= dummy_exporter[0].items()
def _prepare_registry_and_repo(repo_config, repo_path): store = FeatureStore(config=repo_config) project = store.project if not is_valid_name(project): print( f"{project} is not valid. Project name should only have " f"alphanumerical values and underscores but not start with an underscore." ) sys.exit(1) registry = store.registry sys.dont_write_bytecode = True repo = parse_repo(repo_path) return project, registry, repo, store
def feature_view_list(): """ List all feature views """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) table = [] for feature_view in store.list_feature_views(): table.append([feature_view.name, feature_view.entities]) from tabulate import tabulate print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
def benchmark_writes(): project_id = "test" + "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10) ) with tempfile.TemporaryDirectory() as temp_dir: store = FeatureStore( config=RepoConfig( registry=os.path.join(temp_dir, "registry.db"), project=project_id, provider="gcp", ) ) # This is just to set data source to something, we're not reading from parquet source here. parquet_path = os.path.join(temp_dir, "data.parquet") driver = Entity(name="driver_id", value_type=ValueType.INT64) table = create_driver_hourly_stats_feature_view( create_driver_hourly_stats_source(parquet_path=parquet_path) ) store.apply([table, driver]) provider = store._get_provider() end_date = datetime.utcnow() start_date = end_date - timedelta(days=14) customers = list(range(100)) data = create_driver_hourly_stats_df(customers, start_date, end_date) # Show the data for reference print(data) proto_data = _convert_arrow_to_proto( pa.Table.from_pandas(data), table, ["driver_id"] ) # Write it with tqdm(total=len(proto_data)) as progress: provider.online_write_batch( project=store.project, table=table, data=proto_data, progress=progress.update, ) registry_tables = store.list_feature_views() registry_entities = store.list_entities() provider.teardown_infra( store.project, tables=registry_tables, entities=registry_entities )
def feature_view_list(ctx: click.Context): """ List all feature views """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for feature_view in store.list_feature_views(): table.append([feature_view.name, feature_view.entities]) from tabulate import tabulate print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
def enable_alpha_features(ctx: click.Context): """ Enables all alpha features """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) repo_path = str(repo) store = FeatureStore(repo_path=repo_path) if store.config.flags is None: store.config.flags = {} for flag_name in flags.FLAG_NAMES: store.config.flags[flag_name] = True store.config.write_to_path(Path(repo_path))
def on_demand_feature_view_list(ctx: click.Context): """ [Experimental] List all on demand feature views """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for on_demand_feature_view in store.list_on_demand_feature_views(): table.append([on_demand_feature_view.name]) from tabulate import tabulate print(tabulate(table, headers=["NAME"], tablefmt="plain"))
def endpoint(ctx: click.Context): """ Display feature server endpoints. """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) endpoint = store.get_feature_server_endpoint() if endpoint is not None: _logger.info( f"Feature server endpoint: {Style.BRIGHT + Fore.GREEN}{endpoint}{Style.RESET_ALL}" ) else: _logger.info("There is no active feature server.")
def materialize_incremental_command(end_ts: str, views: List[str]): """ Run an incremental materialization job to ingest new data into the online store. Feast will read all data from the previously ingested point to END_TS from the offline store and write it to the online store. If you don't specify feature view names using --views, all registered Feature Views will be incrementally materialized. END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01' """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) store.materialize_incremental( feature_views=None if not views else views, end_date=datetime.fromisoformat(end_ts), )
def materialize_command(repo_path: str, start_ts: str, end_ts: str, views: List[str]): """ Run a (non-incremental) materialization job to ingest data into the online store. Feast will read all data between START_TS and END_TS from the offline store and write it to the online store. If you don't specify feature view names using --views, all registred Feature Views will be materialized. START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01' """ store = FeatureStore(repo_path=repo_path) store.materialize( feature_views=None if not views else views, start_date=datetime.fromisoformat(start_ts).replace(tzinfo=utc), end_date=datetime.fromisoformat(end_ts).replace(tzinfo=utc), )
def test_basic() -> None: project_id = "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10)) runner = CliRunner() with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: {project_id} registry: {data_path / "registry.db"} provider: aws online_store: type: dynamodb region: us-west-2 offline_store: type: redshift cluster_id: feast-integration-tests region: us-west-2 user: admin database: feast s3_staging_location: s3://feast-integration-tests/redshift iam_role: arn:aws:iam::402087665549:role/redshift_s3_access_role """)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) result = runner.run(["teardown"], cwd=repo_path) assert result.returncode == 0
def disable_alpha_feature(ctx: click.Context, name: str): """ Disables an alpha feature """ if name not in flags.FLAG_NAMES: raise ValueError(f"Flag name, {name}, not valid.") repo = ctx.obj["CHDIR"] cli_check_repo(repo) repo_path = str(repo) store = FeatureStore(repo_path=repo_path) if store.config.flags is None or name not in store.config.flags: return store.config.flags[name] = False store.config.write_to_path(Path(repo_path))
def list_alpha_features(ctx: click.Context): """ Lists all alpha features """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) repo_path = str(repo) store = FeatureStore(repo_path=repo_path) flags_to_show = flags.FLAG_NAMES.copy() flags_to_show.remove(flags.FLAG_ALPHA_FEATURES_NAME) print("Alpha features:") for flag in flags_to_show: enabled_string = ("enabled" if flags_helper.feature_flag_enabled( store.config, flag) else "disabled") print(f"{flag}: {enabled_string}")
def entity_list(): """ List all entities """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) table = [] for entity in store.list_entities(): table.append([entity.name, entity.description, entity.value_type]) from tabulate import tabulate print( tabulate(table, headers=["NAME", "DESCRIPTION", "TYPE"], tablefmt="plain"))
def entity_list(ctx: click.Context): """ List all entities """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for entity in store.list_entities(): table.append([entity.name, entity.description, entity.value_type]) from tabulate import tabulate print( tabulate(table, headers=["NAME", "DESCRIPTION", "TYPE"], tablefmt="plain"))