def test_dump(self): entity = Entity("entity", "description", ["tag1", "tag2"]) entity.dump("myentity.yaml") actual = Entity.from_yaml("myentity.yaml") assert actual.name == entity.name assert actual.description == entity.description for t1, t2 in zip(actual.tags, entity.tags): assert t1 == t2 #cleanup os.remove("myentity.yaml")
def test_read_from_yaml(self): entity_no_tag = Entity.from_yaml( "tests/sample/valid_entity_no_tag.yaml") assert entity_no_tag.name == "myentity" assert entity_no_tag.description == "test entity without tag" assert len(entity_no_tag.tags) == 0 entity = Entity.from_yaml("tests/sample/valid_entity.yaml") assert entity.name == "myentity" assert entity.description == "test entity with tag" assert entity.tags[0] == "tag1" assert entity.tags[1] == "tag2"
def from_df(cls, df, entity, granularity, owner, staging_location, id_column=None, feature_columns=None, timestamp_column=None, timestamp_value=None, serving_store=None, warehouse_store=None, job_options={}): """Creates an importer from a given pandas dataframe. To import a file from a dataframe, the data will have to be staged. Args: path (str): path to csv file entity (str): entity id granularity (Granularity): granularity of data owner (str): owner staging_location (str): Defaults to None. Staging location for ingesting a local csv file. id_column (str, optional): Defaults to None. Id column in the csv. If not set, will default to the `entity` argument. feature_columns ([str], optional): Defaults to None. Feature columns to ingest. If not set, the importer will by default ingest all available columns. timestamp_column (str, optional): Defaults to None. Timestamp column in the csv. If not set, defaults to timestamp value. timestamp_value (datetime, optional): Defaults to current datetime. Timestamp value to assign to all features in the dataset. serving_store (feast.sdk.resources.feature.DataStore): Defaults to None. Serving store to write the features in this instance to. warehouse_store (feast.sdk.resources.feature.DataStore): Defaults to None. Warehouse store to write the features in this instance to. job_options (dict): Defaults to empty dict. Additional job options. Returns: Importer: the importer for the dataset provided. """ tmp_file_name = ("tmp_{}_{}.csv".format(entity, int(round(time.time() * 1000)))) source_options = {"format": "csv"} source_options["path"], require_staging = (_get_remote_location( tmp_file_name, staging_location)) schema, features = \ _detect_schema_and_feature(entity, granularity, owner, id_column, feature_columns, timestamp_column, timestamp_value, serving_store, warehouse_store, df) iport_spec = _create_import("file", source_options, job_options, entity, schema) props = _properties("dataframe", len(df.index), require_staging, source_options["path"]) specs = _specs(iport_spec, Entity(name=entity), features) return cls(specs, df, props)
def test_apply_multiple(self, client, mocker): my_storage = Storage(id="TEST", type="redis") my_feature_group = FeatureGroup(id="test") my_entity = Entity(name="test") grpc_stub = core.CoreServiceStub(grpc.insecure_channel("")) mocker.patch.object( grpc_stub, 'ApplyStorage', return_value=CoreServiceTypes.ApplyStorageResponse( storageId="TEST")) mocker.patch.object( grpc_stub, 'ApplyFeatureGroup', return_value=CoreServiceTypes.ApplyFeatureGroupResponse( featureGroupId="test")) mocker.patch.object( grpc_stub, 'ApplyEntity', return_value=CoreServiceTypes.ApplyEntityResponse( entityName="test")) client._core_service_stub = grpc_stub ids = client.apply([my_storage, my_entity, my_feature_group]) assert ids == ["TEST", "test", "test"]
def _register_resources(client, entities_fldr, features_fldr): resources = [] for ent_file in os.listdir(entities_fldr): resources.append(Entity.from_yaml(os.path.join(entities_fldr, ent_file))) for feat_file in os.listdir(features_fldr): resources.append(Feature.from_yaml(os.path.join(features_fldr, feat_file))) client.apply(resources)
def from_csv(cls, path, entity, granularity, owner, staging_location=None, id_column=None, feature_columns=None, timestamp_column=None, timestamp_value=None, serving_store=None, warehouse_store=None): """Creates an importer from a given csv dataset. This file can be either local or remote (in gcs). If it's a local file then staging_location must be determined. Args: path (str): path to csv file entity (str): entity id granularity (Granularity): granularity of data owner (str): owner staging_location (str, optional): Defaults to None. Staging location for ingesting a local csv file. id_column (str, optional): Defaults to None. Id column in the csv. If not set, will default to the `entity` argument. feature_columns ([str], optional): Defaults to None. Feature columns to ingest. If not set, the importer will by default ingest all available columns. timestamp_column (str, optional): Defaults to None. Timestamp column in the csv. If not set, defaults to timestamp value. timestamp_value (datetime, optional): Defaults to current datetime. Timestamp value to assign to all features in the dataset. serving_store (feast.sdk.resources.feature.DataStore): Defaults to None. Serving store to write the features in this instance to. warehouse_store (feast.sdk.resources.feature.DataStore): Defaults to None. Warehouse store to write the features in this instance to. Returns: Importer: the importer for the dataset provided. """ import_spec_options = {"format": "csv"} import_spec_options["path"], require_staging = \ _get_remote_location(path, staging_location) if is_gs_path(path): df = gcs_to_df(path) else: df = pd.read_csv(path) schema, features = \ _detect_schema_and_feature(entity, granularity, owner, id_column, feature_columns, timestamp_column, timestamp_value, serving_store, warehouse_store, df) iport_spec = _create_import("file", import_spec_options, entity, schema) props = (_properties("csv", len(df.index), require_staging, import_spec_options["path"])) specs = _specs(iport_spec, Entity(name=entity), features) return cls(specs, df, props)
def test_apply_single_entity(self, client, mocker): my_entity = Entity(name="test") grpc_stub = core.CoreServiceStub(grpc.insecure_channel("")) with mocker.patch.object(grpc_stub, 'ApplyEntity', return_value=CoreServiceTypes.ApplyEntityResponse( entityName="test")): client._core_service_stub = grpc_stub name = client.apply(my_entity) assert name == "test"
def from_bq(cls, bq_path, entity, granularity, owner, limit=10, id_column=None, feature_columns=None, timestamp_column=None, timestamp_value=None, serving_store=None, warehouse_store=None, job_options={}): """Creates an importer from a given bigquery table. Args: bq_path (str): path to bigquery table, in the format project.dataset.table entity (str): entity id granularity (Granularity): granularity of data owner (str): owner limit (int, optional): Defaults to 10. The maximum number of rows to read into the importer df. id_column (str, optional): Defaults to None. Id column in the csv. If not set, will default to the `entity` argument. feature_columns ([str], optional): Defaults to None. Feature columns to ingest. If not set, the importer will by default ingest all available columns. timestamp_column (str, optional): Defaults to None. Timestamp column in the csv. If not set, defaults to timestamp value. timestamp_value (datetime, optional): Defaults to current datetime. Timestamp value to assign to all features in the dataset. serving_store (feast.sdk.resources.feature.DataStore): Defaults to None. Serving store to write the features in this instance to. warehouse_store (feast.sdk.resources.feature.DataStore): Defaults to None. Warehouse store to write the features in this instance to. job_options (dict): Defaults to empty dict. Additional job options. Returns: Importer: the importer for the dataset provided. """ cli = bigquery.Client() project, dataset_id, table_id = bq_path.split(".") dataset_ref = cli.dataset(dataset_id, project=project) table_ref = dataset_ref.table(table_id) table = cli.get_table(table_ref) source_options = { "project": project, "dataset": dataset_id, "table": table_id } df = head(cli, table, limit) schema, features = \ _detect_schema_and_feature(entity, granularity, owner, id_column, feature_columns, timestamp_column, timestamp_value, serving_store, warehouse_store, df) iport_spec = _create_import("bigquery", source_options, job_options, entity, schema) props = _properties("bigquery", table.num_rows, False, None) specs = _specs(iport_spec, Entity(name=entity), features) return cls(specs, df, props)