Exemplo n.º 1
0
    def __init__(
        self,
        name,
        version,
        description,
        data_format,
        location,
        featurestore_id,
        storage_connector=None,
        splits=None,
        seed=None,
        cluster_analysis=None,
        created=None,
        creator=None,
        descriptive_statistics=None,
        feature_correlation_matrix=None,
        features=None,
        features_histogram=None,
        featurestore_name=None,
        id=None,
        jobs=None,
        inode_id=None,
        storage_connector_name=None,
        storage_connector_id=None,
        storage_connector_type=None,
        training_dataset_type=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._seed = seed
        self._location = location

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            featurestore_id)

        # set up depending on user initialized or coming from backend response
        if training_dataset_type is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = self._storage_connector_api.get_by_id(
                storage_connector_id, storage_connector_type)
            self._features = [
                feature.Feature.from_response_json(feat) for feat in features
            ]
            self._splits = splits
            self._training_dataset_type = training_dataset_type
Exemplo n.º 2
0
    def __init__(self, feature_store_id):
        self._feature_store_id = feature_store_id

        self._feature_view_api = feature_view_api.FeatureViewApi(
            feature_store_id)
        self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            feature_store_id)
        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                feature_store_id))
        self._td_code_engine = code_engine.CodeEngine(
            feature_store_id, self._TRAINING_DATA_API_PATH)
        self._statistics_engine = statistics_engine.StatisticsEngine(
            feature_store_id, self._TRAINING_DATA_API_PATH)
        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            feature_store_id)
        self._query_constructor_api = query_constructor_api.QueryConstructorApi(
        )
    def __init__(
        self,
        name,
        version,
        data_format,
        featurestore_id,
        location="",
        event_start_time=None,
        event_end_time=None,
        coalesce=False,
        description=None,
        storage_connector=None,
        splits=None,
        validation_size=None,
        test_size=None,
        train_start=None,
        train_end=None,
        validation_start=None,
        validation_end=None,
        test_start=None,
        test_end=None,
        seed=None,
        created=None,
        creator=None,
        features=None,
        statistics_config=None,
        featurestore_name=None,
        id=None,
        inode_id=None,
        training_dataset_type=None,
        from_query=None,
        querydto=None,
        label=None,
        transformation_functions=None,
        train_split=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._start_time = self._convert_event_time_to_timestamp(
            event_start_time)
        self._end_time = self._convert_event_time_to_timestamp(event_end_time)
        self._validation_size = validation_size
        self._test_size = test_size
        self._train_start = train_start
        self._train_end = train_end
        self._validation_start = validation_start
        self._validation_end = validation_end
        self._test_start = test_start
        self._test_end = test_end
        self._coalesce = coalesce
        self._seed = seed
        self._location = location
        self._from_query = from_query
        self._querydto = querydto
        self._feature_store_id = featurestore_id
        self._transformation_functions = transformation_functions
        self._train_split = train_split

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)

        self._code_engine = code_engine.CodeEngine(featurestore_id,
                                                   self.ENTITY_TYPE)

        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                featurestore_id))
        if training_dataset_type:
            self.training_dataset_type = training_dataset_type
        else:
            self._training_dataset_type = None
        # set up depending on user initialized or coming from backend response
        if created is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
            self.statistics_config = statistics_config
            self._label = label
            if validation_size or test_size:
                self._train_split = TrainingDatasetSplit.TRAIN
                self.splits = {
                    TrainingDatasetSplit.TRAIN:
                    1 - (validation_size or 0) - (test_size or 0),
                    TrainingDatasetSplit.VALIDATION:
                    validation_size,
                    TrainingDatasetSplit.TEST:
                    test_size,
                }
            self._set_time_splits(
                train_start,
                train_end,
                validation_start,
                validation_end,
                test_start,
                test_end,
            )
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = StorageConnector.from_response_json(
                storage_connector)

            if features is None:
                features = []
            self._features = [
                training_dataset_feature.TrainingDatasetFeature.
                from_response_json(feat) for feat in features
            ]
            self._splits = [
                TrainingDatasetSplit.from_response_json(split)
                for split in splits
            ]
            self._statistics_config = StatisticsConfig.from_response_json(
                statistics_config)
            self._label = [
                feat.name.lower() for feat in self._features if feat.label
            ]

        self._vector_server = vector_server.VectorServer(
            featurestore_id, features=self._features)
Exemplo n.º 4
0
    def __init__(
        self,
        name,
        version,
        data_format,
        location,
        featurestore_id,
        coalesce=False,
        description=None,
        storage_connector=None,
        splits=None,
        seed=None,
        created=None,
        creator=None,
        features=None,
        statistics_config=None,
        featurestore_name=None,
        id=None,
        inode_id=None,
        training_dataset_type=None,
        from_query=None,
        querydto=None,
        label=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._coalesce = coalesce
        self._seed = seed
        self._location = location
        self._from_query = from_query
        self._querydto = querydto
        self._feature_store_id = featurestore_id
        self._prepared_statement_connection = None
        self._prepared_statements = None
        self._serving_keys = None

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)

        # set up depending on user initialized or coming from backend response
        if training_dataset_type is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
            self.statistics_config = statistics_config
            self._label = label
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = StorageConnector.from_response_json(
                storage_connector)

            self._features = [
                training_dataset_feature.TrainingDatasetFeature.
                from_response_json(feat) for feat in features
            ]
            self._splits = splits
            self._training_dataset_type = training_dataset_type
            self._statistics_config = StatisticsConfig.from_response_json(
                statistics_config)
            self._label = [
                feat.name.lower() for feat in self._features if feat.label
            ]
    def __init__(
        self,
        name,
        version,
        data_format,
        location,
        featurestore_id,
        description=None,
        storage_connector=None,
        splits=None,
        seed=None,
        created=None,
        creator=None,
        features=None,
        statistics_config=None,
        featurestore_name=None,
        id=None,
        jobs=None,
        inode_id=None,
        storage_connector_name=None,
        storage_connector_id=None,
        storage_connector_type=None,
        training_dataset_type=None,
        from_query=None,
        querydto=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._seed = seed
        self._location = location
        self._from_query = from_query
        self._querydto = querydto

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)

        # set up depending on user initialized or coming from backend response
        if training_dataset_type is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
            self.statistics_config = statistics_config
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = self._storage_connector_api.get_by_id(
                storage_connector_id, storage_connector_type)
            self._features = [
                training_dataset_feature.TrainingDatasetFeature.
                from_response_json(feat) for feat in features
            ]
            self._splits = splits
            self._training_dataset_type = training_dataset_type
            self.statistics_config = None