Пример #1
0
    def __init__(
        self,
        featurestore_id,
        featurestore_name,
        created,
        hdfs_store_path,
        project_name,
        project_id,
        featurestore_description,
        inode_id,
        offline_featurestore_name,
        hive_endpoint,
        online_enabled,
        num_feature_groups=None,
        num_training_datasets=None,
        num_storage_connectors=None,
        online_featurestore_name=None,
        mysql_server_endpoint=None,
        online_featurestore_size=None,
    ):
        self._id = featurestore_id
        self._name = featurestore_name
        self._created = created
        self._hdfs_store_path = hdfs_store_path
        self._project_name = project_name
        self._project_id = project_id
        self._description = featurestore_description
        self._inode_id = inode_id
        self._online_feature_store_name = online_featurestore_name
        self._online_feature_store_size = online_featurestore_size
        self._offline_feature_store_name = offline_featurestore_name
        self._hive_endpoint = hive_endpoint
        self._mysql_server_endpoint = mysql_server_endpoint
        self._online_enabled = online_enabled
        self._num_feature_groups = num_feature_groups
        self._num_training_datasets = num_training_datasets
        self._num_storage_connectors = num_storage_connectors

        self._feature_group_api = feature_group_api.FeatureGroupApi(self._id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._id)
        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            self._id)
        self._expectations_api = expectations_api.ExpectationsApi(self._id)

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            self._id)

        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                self._id))
        self._feature_view_engine = feature_view_engine.FeatureViewEngine(
            self._id)
Пример #2
0
    def __init__(
        self,
        featurestore_id,
        featurestore_name,
        created,
        hdfs_store_path,
        project_name,
        project_id,
        featurestore_description,
        inode_id,
        offline_featurestore_name,
        hive_endpoint,
        online_enabled,
        online_featurestore_name=None,
        mysql_server_endpoint=None,
        online_featurestore_size=None,
    ):
        self._id = featurestore_id
        self._name = featurestore_name
        self._created = created
        self._hdfs_store_path = hdfs_store_path
        self._project_name = project_name
        self._project_id = project_id
        self._description = featurestore_description
        self._inode_id = inode_id
        self._online_feature_store_name = online_featurestore_name
        self._online_feature_store_size = online_featurestore_size
        self._offline_feature_store_name = offline_featurestore_name
        self._hive_endpoint = hive_endpoint
        self._mysql_server_endpoint = mysql_server_endpoint
        self._online_enabled = online_enabled

        self._feature_group_api = feature_group_api.FeatureGroupApi(self._id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._id)
        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            self._id)

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            self._id)
Пример #3
0
    def __init__(
        self,
        name,
        version,
        featurestore_id,
        description="",
        partition_key=None,
        primary_key=None,
        hudi_precombine_key=None,
        featurestore_name=None,
        created=None,
        creator=None,
        id=None,
        features=None,
        location=None,
        online_enabled=False,
        time_travel_format=None,
        statistics_config=None,
        validation_type="NONE",
        expectations=None,
    ):
        super().__init__(featurestore_id)

        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._name = name
        self._id = id
        self._features = [
            feature.Feature.from_response_json(feat)
            if isinstance(feat, dict) else feat for feat in features
        ]

        self._location = location
        self._online_enabled = online_enabled
        self._time_travel_format = (time_travel_format.upper() if
                                    time_travel_format is not None else None)

        if id is not None:
            # initialized by backend
            self._primary_key = [
                feat.name for feat in self._features if feat.primary is True
            ]
            self._partition_key = [
                feat.name for feat in self._features if feat.partition is True
            ]
            if time_travel_format is not None and time_travel_format.upper(
            ) == "HUDI":
                # hudi precombine key is always a single feature
                self._hudi_precombine_key = [
                    feat.name for feat in self._features
                    if feat.hudi_precombine_key is True
                ][0]
            else:
                self._hudi_precombine_key = None

            self.statistics_config = statistics_config

        else:
            # initialized by user
            self.primary_key = primary_key
            self.partition_key = partition_key
            self._hudi_precombine_key = (
                hudi_precombine_key.lower() if hudi_precombine_key is not None
                and time_travel_format is not None
                and time_travel_format.upper() == "HUDI" else None)
            self.statistics_config = statistics_config

        self._data_validation_engine = data_validation_engine.DataValidationEngine(
            featurestore_id, self.ENTITY_TYPE)
        self._validation_type = validation_type.upper()
        if expectations is not None:
            self._expectations_names = [
                expectation.name for expectation in expectations
            ]
        else:
            self._expectations_names = []

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            featurestore_id)
Пример #4
0
    def __init__(
        self,
        name,
        version,
        description,
        featurestore_id,
        partition_key=None,
        primary_key=None,
        featurestore_name=None,
        created=None,
        creator=None,
        descriptive_statistics=None,
        feature_correlation_matrix=None,
        features_histogram=None,
        cluster_analysis=None,
        id=None,
        features=None,
        location=None,
        jobs=None,
        desc_stats_enabled=None,
        feat_corr_enabled=None,
        feat_hist_enabled=None,
        cluster_analysis_enabled=None,
        statistic_columns=None,
        num_bins=None,
        num_clusters=None,
        corr_method=None,
        online_enabled=False,
        hudi_enabled=False,
        default_storage="offline",
    ):
        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._descriptive_statistics = descriptive_statistics
        self._feature_correlation_matrix = feature_correlation_matrix
        self._features_histogram = features_histogram
        self._cluster_analysis = cluster_analysis
        self._name = name
        self._id = id
        self._features = [
            feature.Feature.from_response_json(feat) for feat in features
        ]
        self._location = location
        self._jobs = jobs
        self._desc_stats_enabled = desc_stats_enabled
        self._feat_corr_enabled = feat_corr_enabled
        self._feat_hist_enabled = feat_hist_enabled
        self._cluster_analysis_enabled = cluster_analysis_enabled
        self._statistic_columns = statistic_columns
        self._num_bins = num_bins
        self._num_clusters = num_clusters
        self._corr_method = corr_method
        self._online_enabled = online_enabled
        self._default_storage = default_storage
        self._hudi_enabled = hudi_enabled

        self._primary_key = primary_key
        self._partition_key = partition_key

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            featurestore_id)
Пример #5
0
    def __init__(
        self,
        name,
        version,
        featurestore_id,
        description="",
        partition_key=None,
        primary_key=None,
        hudi_precombine_key=None,
        featurestore_name=None,
        created=None,
        creator=None,
        id=None,
        features=None,
        location=None,
        jobs=None,
        desc_stats_enabled=None,
        feat_corr_enabled=None,
        feat_hist_enabled=None,
        statistic_columns=None,
        online_enabled=False,
        time_travel_format=None,
        statistics_config=None,
    ):
        super().__init__(featurestore_id)

        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._name = name
        self._id = id
        self._features = [
            feature.Feature.from_response_json(feat)
            if isinstance(feat, dict) else feat for feat in features
        ]

        self._location = location
        self._jobs = jobs
        self._online_enabled = online_enabled
        self._time_travel_format = (time_travel_format.upper() if
                                    time_travel_format is not None else None)

        if id is not None:
            # initialized by backend
            self.statistics_config = StatisticsConfig(
                desc_stats_enabled,
                feat_corr_enabled,
                feat_hist_enabled,
                statistic_columns,
            )
            self._primary_key = [
                feat.name for feat in self._features if feat.primary is True
            ]
            self._partition_key = [
                feat.name for feat in self._features if feat.partition is True
            ]
            if time_travel_format is not None and time_travel_format.upper(
            ) == "HUDI":
                # hudi precombine key is always a single feature
                self._hudi_precombine_key = [
                    feat.name for feat in self._features
                    if feat.hudi_precombine_key is True
                ][0]
            else:
                self._hudi_precombine_key = None
        else:
            # initialized by user
            self.statistics_config = statistics_config
            self._primary_key = primary_key
            self._partition_key = partition_key
            self._hudi_precombine_key = (
                hudi_precombine_key if time_travel_format is not None
                and time_travel_format.upper() == "HUDI" else None)

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            featurestore_id)
Пример #6
0
    def __init__(
        self,
        name,
        version,
        description,
        featurestore_id,
        partition_key=None,
        primary_key=None,
        featurestore_name=None,
        created=None,
        creator=None,
        id=None,
        features=None,
        location=None,
        jobs=None,
        desc_stats_enabled=None,
        feat_corr_enabled=None,
        feat_hist_enabled=None,
        statistic_columns=None,
        online_enabled=False,
        hudi_enabled=False,
        default_storage="offline",
        statistics_config=None,
    ):
        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._name = name
        self._id = id
        self._features = [
            feature.Feature.from_response_json(feat) for feat in features
        ]
        self._location = location
        self._jobs = jobs
        self._online_enabled = online_enabled
        self._default_storage = default_storage
        self._hudi_enabled = hudi_enabled

        if id is None:
            # Initialized from the API
            self._primary_key = primary_key
            self._partition_key = partition_key
        else:
            # Initialized from the backend
            self._primary_key = [f.name for f in self._features if f.primary]
            self._partition_key = [
                f.name for f in self._features if f.partition
            ]

        if id is not None:
            # initialized by backend
            self.statistics_config = StatisticsConfig(
                desc_stats_enabled,
                feat_corr_enabled,
                feat_hist_enabled,
                statistic_columns,
            )
            self._primary_key = [
                feat.name for feat in self._features if feat.primary is True
            ]
            self._partition_key = [
                feat.name for feat in self._features if feat.partition is True
            ]
        else:
            # initialized by user
            self.statistics_config = statistics_config
            self._primary_key = primary_key
            self._partition_key = partition_key

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)