def _update_features_metadata(self, feature_group, features):
     # perform changes on copy in case the update fails, so we don't leave
     # the user object in corrupted state
     copy_feature_group = fg.OnDemandFeatureGroup(
         storage_connector=feature_group.storage_connector,
         id=feature_group.id,
         features=features,
     )
     self._feature_group_api.update_metadata(feature_group,
                                             copy_feature_group,
                                             "updateMetadata")
 def update_description(self, feature_group, description):
     """Updates the description of a feature group."""
     copy_feature_group = fg.OnDemandFeatureGroup(
         storage_connector=feature_group.storage_connector,
         id=feature_group.id,
         description=description,
         features=feature_group.features,
     )
     self._feature_group_api.update_metadata(feature_group,
                                             copy_feature_group,
                                             "updateMetadata")
Пример #3
0
    def create_on_demand_feature_group(
        self,
        name: str,
        storage_connector: storage_connector.StorageConnector,
        query: Optional[str] = None,
        data_format: Optional[str] = None,
        path: Optional[str] = "",
        options: Optional[Dict[str, str]] = {},
        version: Optional[int] = None,
        description: Optional[str] = "",
        primary_key: Optional[List[str]] = [],
        features: Optional[List[feature.Feature]] = [],
        statistics_config: Optional[Union[StatisticsConfig, bool,
                                          dict]] = None,
        event_time: Optional[str] = None,
        validation_type: Optional[str] = "NONE",
        expectations: Optional[List[expectation.Expectation]] = [],
    ):
        """Create a on-demand feature group metadata object.

        !!! note "Lazy"
            This method is lazy and does not persist any metadata in the
            feature store on its own. To persist the feature group metadata in the feature store,
            call the `save()` method.

        # Arguments
            name: Name of the on-demand feature group to create.
            query: A string containing a SQL query valid for the target data source.
                the query will be used to pull data from the data sources when the
                feature group is used.
            data_format: If the on-demand feature groups refers to a directory with data,
                the data format to use when reading it
            path: The location within the scope of the storage connector, from where to read
                the data for the on-demand feature group
            storage_connector: the storage connector to use to establish connectivity
                with the data source.
            version: Version of the on-demand feature group to retrieve, defaults to `None` and
                will create the feature group with incremented version from the last
                version in the feature store.
            description: A string describing the contents of the on-demand feature group to
                improve discoverability for Data Scientists, defaults to empty string
                `""`.
            primary_key: A list of feature names to be used as primary key for the
                feature group. This primary key can be a composite key of multiple
                features and will be used as joining key, if not specified otherwise.
                Defaults to empty list `[]`, and the feature group won't have any primary key.
            features: Optionally, define the schema of the on-demand feature group manually as a
                list of `Feature` objects. Defaults to empty list `[]` and will use the
                schema information of the DataFrame resulting by executing the provided query
                against the data source.
            statistics_config: A configuration object, or a dictionary with keys
                "`enabled`" to generally enable descriptive statistics computation for
                this on-demand feature group, `"correlations`" to turn on feature correlation
                computation, `"histograms"` to compute feature value frequencies and
                `"exact_uniqueness"` to compute uniqueness, distinctness and entropy.
                The values should be booleans indicating the setting. To fully turn off
                statistics computation pass `statistics_config=False`. Defaults to
                `None` and will compute only descriptive statistics.
            event_time: Optionally, provide the name of the feature containing the event
                time for the features in this feature group. If event_time is set
                the feature group can be used for point-in-time joins. Defaults to `None`.
            validation_type: Optionally, set the validation type to one of "NONE", "STRICT",
                "WARNING", "ALL". Determines the mode in which data validation is applied on
                 ingested or already existing feature group data.
            expectations: Optionally, a list of expectations to be attached to the feature group.
                The expectations list contains Expectation metadata objects which can be retrieved with
                the `get_expectation()` and `get_expectations()` functions.

        # Returns
            `OnDemandFeatureGroup`. The on-demand feature group metadata object.
        """
        return feature_group.OnDemandFeatureGroup(
            name=name,
            query=query,
            data_format=data_format,
            path=path,
            options=options,
            storage_connector=storage_connector,
            version=version,
            description=description,
            primary_key=primary_key,
            featurestore_id=self._id,
            featurestore_name=self._name,
            features=features,
            statistics_config=statistics_config,
            event_time=event_time,
            validation_type=validation_type,
            expectations=expectations,
        )
Пример #4
0
    def create_on_demand_feature_group(
        self,
        name: str,
        storage_connector: storage_connector.StorageConnector,
        query: Optional[str] = None,
        data_format: Optional[str] = None,
        path: Optional[str] = "",
        options: Optional[Dict[str, str]] = {},
        version: Optional[int] = None,
        description: Optional[str] = "",
        features: Optional[List[feature.Feature]] = [],
        statistics_config: Optional[Union[StatisticsConfig, bool, dict]] = None,
    ):
        """Create a on-demand feature group metadata object.

        !!! note "Lazy"
            This method is lazy and does not persist any metadata or feature data in the
            feature store on its own. To persist the feature group and save feature data
            along the metadata in the feature store, call the `save()` method.

        # Arguments
            name: Name of the on-demand feature group to create.
            query: A string containing a SQL query valid for the target data source.
                the query will be used to pull data from the data sources when the
                feature group is used.
            data_format: If the on-demand feature groups refers to a directory with data,
                the data format to use when reading it
            path: The location within the scope of the storage connector, from where to read
                the data for the on-demand feature group
            storage_connector: the storage connector to use to establish connectivity
                with the data source.
            version: Version of the on-demand feature group to retrieve, defaults to `None` and
                will create the feature group with incremented version from the last
                version in the feature store.
            description: A string describing the contents of the on-demand feature group to
                improve discoverability for Data Scientists, defaults to empty string
                `""`.
            features: Optionally, define the schema of the on-demand feature group manually as a
                list of `Feature` objects. Defaults to empty list `[]` and will use the
                schema information of the DataFrame resulting by executing the provided query
                against the data source.
            statistics_config: A configuration object, or a dictionary with keys
                "`enabled`" to generally enable descriptive statistics computation for
                this on-demand feature group, `"correlations`" to turn on feature correlation
                computation and `"histograms"` to compute feature value frequencies. The
                values should be booleans indicating the setting. To fully turn off
                statistics computation pass `statistics_config=False`. Defaults to
                `None` and will compute only descriptive statistics.

        # Returns
            `OnDemandFeatureGroup`. The on-demand feature group metadata object.
        """
        return feature_group.OnDemandFeatureGroup(
            name=name,
            query=query,
            data_format=data_format,
            path=path,
            options=options,
            storage_connector=storage_connector,
            version=version,
            description=description,
            featurestore_id=self._id,
            featurestore_name=self._name,
            features=features,
            statistics_config=statistics_config,
        )