Пример #1
0
    def test_dump(self):
        entity = Entity("entity", "description", ["tag1", "tag2"])
        entity.dump("myentity.yaml")
        actual = Entity.from_yaml("myentity.yaml")
        assert actual.name == entity.name
        assert actual.description == entity.description
        for t1, t2 in zip(actual.tags, entity.tags):
            assert t1 == t2

        #cleanup
        os.remove("myentity.yaml")
Пример #2
0
    def test_read_from_yaml(self):
        entity_no_tag = Entity.from_yaml(
            "tests/sample/valid_entity_no_tag.yaml")
        assert entity_no_tag.name == "myentity"
        assert entity_no_tag.description == "test entity without tag"
        assert len(entity_no_tag.tags) == 0

        entity = Entity.from_yaml("tests/sample/valid_entity.yaml")
        assert entity.name == "myentity"
        assert entity.description == "test entity with tag"
        assert entity.tags[0] == "tag1"
        assert entity.tags[1] == "tag2"
Пример #3
0
    def from_df(cls,
                df,
                entity,
                granularity,
                owner,
                staging_location,
                id_column=None,
                feature_columns=None,
                timestamp_column=None,
                timestamp_value=None,
                serving_store=None,
                warehouse_store=None,
                job_options={}):
        """Creates an importer from a given pandas dataframe. 
        To import a file from a dataframe, the data will have to be staged.
        
        Args:
            path (str): path to csv file
            entity (str): entity id
            granularity (Granularity): granularity of data
            owner (str): owner
            staging_location (str): Defaults to None. Staging location 
                                                for ingesting a local csv file.
            id_column (str, optional): Defaults to None. Id column in the csv. 
                                If not set, will default to the `entity` argument.
            feature_columns ([str], optional): Defaults to None. Feature columns
                to ingest. If not set, the importer will by default ingest all 
                available columns.
            timestamp_column (str, optional): Defaults to None. Timestamp 
                column in the csv. If not set, defaults to timestamp value.
            timestamp_value (datetime, optional): Defaults to current datetime. 
                Timestamp value to assign to all features in the dataset.
            serving_store (feast.sdk.resources.feature.DataStore): Defaults to None.
                Serving store to write the features in this instance to.
            warehouse_store (feast.sdk.resources.feature.DataStore): Defaults to None.
                Warehouse store to write the features in this instance to.
            job_options (dict): Defaults to empty dict. Additional job options.
        
        Returns:
            Importer: the importer for the dataset provided.
        """
        tmp_file_name = ("tmp_{}_{}.csv".format(entity,
                                                int(round(time.time() *
                                                          1000))))
        source_options = {"format": "csv"}
        source_options["path"], require_staging = (_get_remote_location(
            tmp_file_name, staging_location))
        schema, features = \
            _detect_schema_and_feature(entity, granularity, owner, id_column,
                                       feature_columns, timestamp_column,
                                       timestamp_value, serving_store,
                                       warehouse_store, df)
        iport_spec = _create_import("file", source_options, job_options,
                                    entity, schema)

        props = _properties("dataframe", len(df.index), require_staging,
                            source_options["path"])
        specs = _specs(iport_spec, Entity(name=entity), features)

        return cls(specs, df, props)
Пример #4
0
    def test_apply_multiple(self, client, mocker):
        my_storage = Storage(id="TEST", type="redis")
        my_feature_group = FeatureGroup(id="test")
        my_entity = Entity(name="test")

        grpc_stub = core.CoreServiceStub(grpc.insecure_channel(""))

        mocker.patch.object(
            grpc_stub,
            'ApplyStorage',
            return_value=CoreServiceTypes.ApplyStorageResponse(
                storageId="TEST"))
        mocker.patch.object(
            grpc_stub,
            'ApplyFeatureGroup',
            return_value=CoreServiceTypes.ApplyFeatureGroupResponse(
                featureGroupId="test"))
        mocker.patch.object(
            grpc_stub,
            'ApplyEntity',
            return_value=CoreServiceTypes.ApplyEntityResponse(
                entityName="test"))

        client._core_service_stub = grpc_stub
        ids = client.apply([my_storage, my_entity, my_feature_group])
        assert ids == ["TEST", "test", "test"]
Пример #5
0
def _register_resources(client, entities_fldr, features_fldr):
    resources = []
    for ent_file in os.listdir(entities_fldr):
        resources.append(Entity.from_yaml(os.path.join(entities_fldr, ent_file)))
    for feat_file in os.listdir(features_fldr):
        resources.append(Feature.from_yaml(os.path.join(features_fldr, feat_file)))
    client.apply(resources)
Пример #6
0
    def from_csv(cls,
                 path,
                 entity,
                 granularity,
                 owner,
                 staging_location=None,
                 id_column=None,
                 feature_columns=None,
                 timestamp_column=None,
                 timestamp_value=None,
                 serving_store=None,
                 warehouse_store=None):
        """Creates an importer from a given csv dataset. 
        This file can be either local or remote (in gcs). If it's a local file 
        then staging_location must be determined.
        
        Args:
            path (str): path to csv file
            entity (str): entity id
            granularity (Granularity): granularity of data
            owner (str): owner
            staging_location (str, optional): Defaults to None. Staging location 
                for ingesting a local csv file.
            id_column (str, optional): Defaults to None. Id column in the csv. 
                If not set, will default to the `entity` argument.
            feature_columns ([str], optional): Defaults to None. Feature columns
                to ingest. If not set, the importer will by default ingest all 
                available columns.
            timestamp_column (str, optional): Defaults to None. Timestamp 
                column in the csv. If not set, defaults to timestamp value.
            timestamp_value (datetime, optional): Defaults to current datetime. 
                Timestamp value to assign to all features in the dataset.
            serving_store (feast.sdk.resources.feature.DataStore): Defaults to None.
                Serving store to write the features in this instance to.
            warehouse_store (feast.sdk.resources.feature.DataStore): Defaults to None.
                Warehouse store to write the features in this instance to.
        
        Returns:
            Importer: the importer for the dataset provided.
        """
        import_spec_options = {"format": "csv"}
        import_spec_options["path"], require_staging = \
            _get_remote_location(path, staging_location)
        if is_gs_path(path):
            df = gcs_to_df(path)
        else:
            df = pd.read_csv(path)
        schema, features = \
            _detect_schema_and_feature(entity, granularity, owner, id_column,
                                       feature_columns, timestamp_column,
                                       timestamp_value, serving_store,
                                       warehouse_store, df)
        iport_spec = _create_import("file", import_spec_options, entity,
                                    schema)

        props = (_properties("csv", len(df.index), require_staging,
                             import_spec_options["path"]))
        specs = _specs(iport_spec, Entity(name=entity), features)

        return cls(specs, df, props)
Пример #7
0
    def test_apply_single_entity(self, client, mocker):
        my_entity = Entity(name="test")
        grpc_stub = core.CoreServiceStub(grpc.insecure_channel(""))

        with mocker.patch.object(grpc_stub, 'ApplyEntity',
                                 return_value=CoreServiceTypes.ApplyEntityResponse(
                                     entityName="test")):
            client._core_service_stub = grpc_stub
            name = client.apply(my_entity)
            assert name == "test"
Пример #8
0
    def from_bq(cls,
                bq_path,
                entity,
                granularity,
                owner,
                limit=10,
                id_column=None,
                feature_columns=None,
                timestamp_column=None,
                timestamp_value=None,
                serving_store=None,
                warehouse_store=None,
                job_options={}):
        """Creates an importer from a given bigquery table. 
        
        Args:
            bq_path (str): path to bigquery table, in the format 
                            project.dataset.table
            entity (str): entity id
            granularity (Granularity): granularity of data
            owner (str): owner
            limit (int, optional): Defaults to 10. The maximum number of rows to 
                read into the importer df.
            id_column (str, optional): Defaults to None. Id column in the csv. 
                If not set, will default to the `entity` argument.
            feature_columns ([str], optional): Defaults to None. Feature columns
                to ingest. If not set, the importer will by default ingest all 
                available columns.
            timestamp_column (str, optional): Defaults to None. Timestamp 
                column in the csv. If not set, defaults to timestamp value.
            timestamp_value (datetime, optional): Defaults to current datetime. 
                Timestamp value to assign to all features in the dataset.
            serving_store (feast.sdk.resources.feature.DataStore): Defaults to None.
                Serving store to write the features in this instance to.
            warehouse_store (feast.sdk.resources.feature.DataStore): Defaults to None.
                Warehouse store to write the features in this instance to.
            job_options (dict): Defaults to empty dict. Additional job options.
        
        Returns:
            Importer: the importer for the dataset provided.
        """

        cli = bigquery.Client()
        project, dataset_id, table_id = bq_path.split(".")
        dataset_ref = cli.dataset(dataset_id, project=project)
        table_ref = dataset_ref.table(table_id)
        table = cli.get_table(table_ref)

        source_options = {
            "project": project,
            "dataset": dataset_id,
            "table": table_id
        }
        df = head(cli, table, limit)
        schema, features = \
            _detect_schema_and_feature(entity, granularity, owner, id_column,
                                       feature_columns, timestamp_column,
                                       timestamp_value, serving_store,
                                       warehouse_store, df)
        iport_spec = _create_import("bigquery", source_options, job_options,
                                    entity, schema)

        props = _properties("bigquery", table.num_rows, False, None)
        specs = _specs(iport_spec, Entity(name=entity), features)
        return cls(specs, df, props)