Пример #1
0
 def create_feature_group(
     self,
     name,
     version=None,
     description="",
     default_storage="offline",
     online_enabled=False,
     partition_key=[],
     primary_key=[],
     features=[],
     statistics_config=None,
 ):
     return feature_group.FeatureGroup(
         name=name,
         version=version,
         description=description,
         online_enabled=online_enabled,
         default_storage=default_storage,
         partition_key=partition_key,
         primary_key=primary_key,
         featurestore_id=self._id,
         featurestore_name=self._name,
         features=features,
         statistics_config=statistics_config,
     )
 def update_description(self, feature_group, description):
     """Updates the description of a feature group."""
     copy_feature_group = fg.FeatureGroup(
         None,
         None,
         description,
         None,
         id=feature_group.id,
         features=feature_group.features,
     )
     self._feature_group_api.update_metadata(feature_group,
                                             copy_feature_group,
                                             "updateMetadata")
 def update_description(self, feature_group, description):
     """Updates the description of a feature group."""
     copy_feature_group = fg.FeatureGroup(
         name=None,
         version=None,
         featurestore_id=None,
         description=description,
         id=feature_group.id,
         stream=feature_group.stream,
         features=feature_group.features,
     )
     self._feature_group_api.update_metadata(feature_group,
                                             copy_feature_group,
                                             "updateMetadata")
 def append_features(self, feature_group, new_features):
     """Appends features to a feature group."""
     # perform changes on copy in case the update fails, so we don't leave
     # the user object in corrupted state
     copy_feature_group = fg.FeatureGroup(
         None,
         None,
         None,
         None,
         id=feature_group.id,
         features=feature_group.features + new_features,
     )
     self._feature_group_api.update_metadata(feature_group,
                                             copy_feature_group,
                                             "updateMetadata")
 def _update_features_metadata(self, feature_group, features):
     # perform changes on copy in case the update fails, so we don't leave
     # the user object in corrupted state
     copy_feature_group = fg.FeatureGroup(
         name=None,
         version=None,
         featurestore_id=None,
         description=None,
         id=feature_group.id,
         stream=feature_group.stream,
         features=features,
     )
     self._feature_group_api.update_metadata(feature_group,
                                             copy_feature_group,
                                             "updateMetadata")
Пример #6
0
    def get_or_create_feature_group(
        self,
        name: str,
        version: int,
        description: Optional[str] = "",
        online_enabled: Optional[bool] = False,
        time_travel_format: Optional[str] = "HUDI",
        partition_key: Optional[List[str]] = [],
        primary_key: Optional[List[str]] = [],
        hudi_precombine_key: Optional[str] = None,
        features: Optional[List[feature.Feature]] = [],
        statistics_config: Optional[Union[StatisticsConfig, bool,
                                          dict]] = None,
        validation_type: Optional[str] = "NONE",
        expectations: Optional[List[expectation.Expectation]] = [],
        expectation_suite: Optional[Union[expectation_suite.ExpectationSuite,
                                          ge.core.ExpectationSuite]] = None,
        event_time: Optional[str] = None,
        stream: Optional[bool] = False,
    ):
        """Get feature group metadata object or create a new one if it doesn't exist. This method doesn't update existing feature group metadata object.

        !!! note "Lazy"
            This method is lazy and does not persist any metadata or feature data in the
            feature store on its own. To persist the feature group and save feature data
            along the metadata in the feature store, call the `insert()` method with a
            DataFrame.

        # Arguments
            name: Name of the feature group to create.
            version: Version of the feature group to retrieve or create.
            description: A string describing the contents of the feature group to
                improve discoverability for Data Scientists, defaults to empty string
                `""`.
            online_enabled: Define whether the feature group should be made available
                also in the online feature store for low latency access, defaults to
                `False`.
            time_travel_format: Format used for time travel, defaults to `"HUDI"`.
            partition_key: A list of feature names to be used as partition key when
                writing the feature data to the offline storage, defaults to empty list
                `[]`.
            primary_key: A list of feature names to be used as primary key for the
                feature group. This primary key can be a composite key of multiple
                features and will be used as joining key, if not specified otherwise.
                Defaults to empty list `[]`, and the feature group won't have any primary key.
            hudi_precombine_key: A feature name to be used as a precombine key for the `"HUDI"`
                feature group. Defaults to `None`. If feature group has time travel format
                `"HUDI"` and hudi precombine key was not specified then the first primary key of
                the feature group will be used as hudi precombine key.
            features: Optionally, define the schema of the feature group manually as a
                list of `Feature` objects. Defaults to empty list `[]` and will use the
                schema information of the DataFrame provided in the `save` method.
            statistics_config: A configuration object, or a dictionary with keys
                "`enabled`" to generally enable descriptive statistics computation for
                this feature group, `"correlations`" to turn on feature correlation
                computation, `"histograms"` to compute feature value frequencies and
                `"exact_uniqueness"` to compute uniqueness, distinctness and entropy.
                The values should be booleans indicating the setting. To fully turn off
                statistics computation pass `statistics_config=False`. Defaults to
                `None` and will compute only descriptive statistics.
            validation_type: Optionally, set the validation type to one of "NONE", "STRICT",
                "WARNING", "ALL". Determines the mode in which data validation is applied on
                 ingested or already existing feature group data.
            expectations: Optionally, a list of expectations to be attached to the feature group.
                The expectations list contains Expectation metadata objects which can be retrieved with
                the `get_expectation()` and `get_expectations()` functions.
            expectation_suite: Optionally, attach an expectation suite to the feature
                group which dataframes should be validated against upon insertion.
                Defaults to `None`.
            event_time: Optionally, provide the name of the feature containing the event
                time for the features in this feature group. If event_time is set
                the feature group can be used for point-in-time joins. Defaults to `None`.
            stream: Optionally, Define whether the feature group should support real time stream writing capabilities.
                Stream enabled Feature Groups have unified single API for writing streaming features transparently
                to both online and offline store.


        # Returns
            `FeatureGroup`. The feature group metadata object.
        """

        try:
            return self._feature_group_api.get(
                name, version, feature_group_api.FeatureGroupApi.CACHED)
        except exceptions.RestAPIError as e:
            if (e.response.json().get("errorCode", "") == 270009
                    and e.response.status_code == 404):
                return feature_group.FeatureGroup(
                    name=name,
                    version=version,
                    description=description,
                    online_enabled=online_enabled,
                    time_travel_format=time_travel_format,
                    partition_key=partition_key,
                    primary_key=primary_key,
                    hudi_precombine_key=hudi_precombine_key,
                    featurestore_id=self._id,
                    featurestore_name=self._name,
                    features=features,
                    statistics_config=statistics_config,
                    validation_type=validation_type,
                    expectations=expectations,
                    event_time=event_time,
                    stream=stream,
                    expectation_suite=expectation_suite,
                )
            else:
                raise e
Пример #7
0
    def create_feature_group(
        self,
        name: str,
        version: Optional[int] = None,
        description: Optional[str] = "",
        online_enabled: Optional[bool] = False,
        time_travel_format: Optional[str] = "HUDI",
        partition_key: Optional[List[str]] = [],
        primary_key: Optional[List[str]] = [],
        features: Optional[List[feature.Feature]] = [],
        statistics_config: Optional[Union[StatisticsConfig, bool,
                                          dict]] = None,
    ):
        """Create a feature group metadata object.

        !!! note "Lazy"
            This method is lazy and does not persist any metadata or feature data in the
            feature store on its own. To persist the feature group and save feature data
            along the metadata in the feature store, call the `save()` method with a
            DataFrame.

        # Arguments
            name: Name of the feature group to create.
            version: Version of the feature group to retrieve, defaults to `None` and
                will create the feature group with incremented version from the last
                version in the feature store.
            description: A string describing the contents of the feature group to
                improve discoverability for Data Scientists, defaults to empty string
                `""`.
            online_enabled: Define whether the feature group should be made available
                also in the online feature store for low latency access, defaults to
                `False`.
            time_travel_format: Format used for time travel, defaults to `"HUDI"`.
            partition_key: A list of feature names to be used as partition key when
                writing the feature data to the offline storage, defaults to empty list
                `[]`.
            primary_key: A list of feature names to be used as primary key for the
                feature group. This primary key can be a composite key of multiple
                features and will be used as joining key, if not specified otherwise.
                Defaults to empty list `[]`, and the first column of the DataFrame will
                be used as primary key.
            features: Optionally, define the schema of the feature group manually as a
                list of `Feature` objects. Defaults to empty list `[]` and will use the
                schema information of the DataFrame provided in the `save` method.
            statistics_config: A configuration object, or a dictionary with keys
                "`enabled`" to generally enable descriptive statistics computation for
                this feature group, `"correlations`" to turn on feature correlation
                computation and `"histograms"` to compute feature value frequencies. The
                values should be booleans indicating the setting. To fully turn off
                statistics computation pass `statistics_config=False`. Defaults to
                `None` and will compute only descriptive statistics.

        # Returns
            `FeatureGroup`. The feature group metadata object.
        """
        return feature_group.FeatureGroup(
            name=name,
            version=version,
            description=description,
            online_enabled=online_enabled,
            time_travel_format=time_travel_format,
            partition_key=partition_key,
            primary_key=primary_key,
            featurestore_id=self._id,
            featurestore_name=self._name,
            features=features,
            statistics_config=statistics_config,
        )