Exemplo n.º 1
0
def _do_get_featuregroup(featuregroup_name, featurestore_metadata, featurestore=None, featuregroup_version=1, online=False):
    """
    Gets a featuregroup from a featurestore as a pandas dataframe

    Args:
        :featuregroup_name: name of the featuregroup to get
        :featurestore_metadata: featurestore metadata
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup_version: (Optional) the version of the featuregroup
        :online: a boolean flag whether to fetch the online feature or the offline one (assuming that the
                 feature group that the feature is stored in has online serving enabled)
                 (for cached feature groups only)

    Returns:
        a pandas dataframe with the contents of the featurestore

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()
    fg = query_planner._find_featuregroup(
        featurestore_metadata.featuregroups, featuregroup_name, featuregroup_version)

    if fg.featuregroup_type == featurestore_metadata.settings.cached_featuregroup_type:
        return _do_get_cached_featuregroup(featuregroup_name, featurestore, featuregroup_version, online)

    raise ValueError("The feature group type: "
                     + fg.featuregroup_type + " was not recognized. Recognized types include: {} and {}"
                     .format(featurestore_metadata.settings.on_demand_featuregroup_type,
                             featurestore_metadata.settings.cached_featuregroup_type))
Exemplo n.º 2
0
def _do_get_cached_featuregroup(featuregroup_name, featurestore=None, featuregroup_version=1, online=False):
    """
    Gets a cached featuregroup from a featurestore as a pandas dataframe

    Args:
        :featuregroup_name: name of the featuregroup to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup_version: (Optional) the version of the featuregroup
        :online: a boolean flag whether to fetch the online feature or the offline one (assuming that the
                 feature group that the feature is stored in has online serving enabled)
                 (for cached feature groups only)

    Returns:
        a pandas dataframe with the contents of the feature group

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    featuregroup_query = FeaturegroupQuery(
        featuregroup_name, featurestore, featuregroup_version)
    logical_query_plan = LogicalQueryPlan(featuregroup_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()
    dataframe = _run_and_log_sql(
        logical_query_plan.sql_str, featurestore=featurestore, online=online)
    return dataframe
Exemplo n.º 3
0
def _do_get_feature(feature, featurestore_metadata, featurestore=None, featuregroup=None, featuregroup_version=1,
                    online=False):
    """
    Gets a particular feature (column) from a featurestore, if no featuregroup is specified it queries
    hopsworks metastore to see if the feature exists in any of the featuregroups in the featurestore.
    If the user knows which featuregroup contain the feature, it should be specified as it will improve performance
    of the query.

    Args:
        :feature: the feature name to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup: (Optional) the featuregroup where the feature resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :featurestore_metadata: the metadata of the featurestore to query
        :online: a boolean flag whether to fetch the online feature or the offline one (assuming that the
                 feature group that the feature is stored in has online serving enabled)
                 (for cached feature groups only)

    Returns:
        A pandas dataframe with the feature

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    feature_query = FeatureQuery(
        feature, featurestore_metadata, featurestore, featuregroup, featuregroup_version)
    logical_query_plan = LogicalQueryPlan(feature_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    dataframe = _run_and_log_sql(
        logical_query_plan.sql_str, featurestore, online)
    return dataframe
Exemplo n.º 4
0
def _do_get_features(features, featurestore_metadata, featurestore=None, featuregroups_version_dict={}, join_key=None,
                     online=False):
    """
    Gets a list of features (columns) from the featurestore. If no featuregroup is specified it will query hopsworks
    metastore to find where the features are stored.

    Args:
        :features: a list of features to get from the featurestore
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroups: (Optional) a dict with (fg --> version) for all the featuregroups where the features resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :join_key: (Optional) column name to join on
        :featurestore_metadata: the metadata of the featurestore
        :online: a boolean flag whether to fetch the online feature or the offline one (assuming that the
                 feature group that the feature is stored in has online serving enabled)
                 (for cached feature groups only)

    Returns:
        A pandas dataframe with all the features

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    features_query = FeaturesQuery(
        features, featurestore_metadata, featurestore, featuregroups_version_dict, join_key)
    logical_query_plan = LogicalQueryPlan(features_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    result = _run_and_log_sql(logical_query_plan.sql_str, featurestore, online)

    return result
Exemplo n.º 5
0
def _do_get_featuregroup(featuregroup_name,
                         featurestore_metadata,
                         featurestore=None,
                         featuregroup_version=1):
    """
    Gets a featuregroup from a featurestore as a pandas dataframe

    Args:
        :featuregroup_name: name of the featuregroup to get
        :featurestore_metadata: featurestore metadata
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup_version: (Optional) the version of the featuregroup

    Returns:
        a pandas dataframe with the contents of the featurestore

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()
    fg = query_planner._find_featuregroup(featurestore_metadata.featuregroups,
                                          featuregroup_name,
                                          featuregroup_version)

    if fg.featuregroup_type == featurestore_metadata.settings.cached_featuregroup_type:
        return _do_get_cached_featuregroup(featuregroup_name, featurestore,
                                           featuregroup_version)

    raise ValueError("The feature group type: "
                     + fg.featuregroup_type + " was not recognized. Recognized types include: {} and {}" \
                     .format(featurestore_metadata.settings.on_demand_featuregroup_type,
                             featurestore_metadata.settings.cached_featuregroup_type))
Exemplo n.º 6
0
def _do_get_features(features, featurestore_metadata, featurestore=None, featuregroups_version_dict={}, join_key=None):
    """
    Gets a list of features (columns) from the featurestore. If no featuregroup is specified it will query hopsworks
    metastore to find where the features are stored.

    Args:
        :features: a list of features to get from the featurestore
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroups: (Optional) a dict with (fg --> version) for all the featuregroups where the features resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :join_key: (Optional) column name to join on
        :featurestore_metadata: the metadata of the featurestore

    Returns:
        A pandas dataframe with all the features

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    hive_conn = util._create_hive_connection(featurestore)

    features_query = FeaturesQuery(features, featurestore_metadata, featurestore, featuregroups_version_dict, join_key)
    logical_query_plan = LogicalQueryPlan(features_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    result = _run_and_log_sql(hive_conn, logical_query_plan.sql_str)

    return result
Exemplo n.º 7
0
def _do_get_feature(feature, featurestore_metadata, featurestore=None, featuregroup=None, featuregroup_version=1):
    """
    Gets a particular feature (column) from a featurestore, if no featuregroup is specified it queries
    hopsworks metastore to see if the feature exists in any of the featuregroups in the featurestore.
    If the user knows which featuregroup contain the feature, it should be specified as it will improve performance
    of the query.

    Args:
        :feature: the feature name to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup: (Optional) the featuregroup where the feature resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :featurestore_metadata: the metadata of the featurestore to query

    Returns:
        A pandas dataframe with the feature

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    hive = util._create_hive_connection(featurestore)

    feature_query = FeatureQuery(feature, featurestore_metadata, featurestore, featuregroup, featuregroup_version)
    logical_query_plan = LogicalQueryPlan(feature_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    dataframe = _run_and_log_sql(hive, logical_query_plan.sql_str)
    return dataframe
Exemplo n.º 8
0
    def _default_featurestore(self, featurestore):
        """
        Returns the default featurestore. If the user did not specify the featurestore, it defaults to the project's
        featurestore

        Args:
            :featurestore: the featurestore argument provided by the user

        Returns:
            The user provided featurestore if not None, otherwise the project's featurestore
        """
        if featurestore is None:
            featurestore = fs_utils._do_get_project_featurestore()
        return featurestore
Exemplo n.º 9
0
def _get_featurestore_metadata(featurestore=None, update_cache=False):
    """
    Makes a REST call to the appservice in hopsworks to get all metadata of a featurestore (featuregroups and
    training datasets) for the provided featurestore.
    Args:
        :featurestore: the name of the database, defaults to the project's featurestore
        :update_cache: if true the cache is updated
    Returns:
        feature store metadata object
    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()
    global metadata_cache
    if metadata_cache is None or update_cache:
        response_object = rest_rpc._get_featurestore_metadata(featurestore)
        metadata_cache = FeaturestoreMetadata(response_object)
    return metadata_cache
Exemplo n.º 10
0
def _do_get_cached_featuregroup(featuregroup_name, featurestore=None, featuregroup_version=1):
    """
    Gets a cached featuregroup from a featurestore as a pandas dataframe

    Args:
        :featuregroup_name: name of the featuregroup to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup_version: (Optional) the version of the featuregroup

    Returns:
        a pandas dataframe with the contents of the feature group

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    hive_conn = util._create_hive_connection(featurestore)

    featuregroup_query = FeaturegroupQuery(featuregroup_name, featurestore, featuregroup_version)
    logical_query_plan = LogicalQueryPlan(featuregroup_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()
    dataframe = _run_and_log_sql(hive_conn, logical_query_plan.sql_str)
    return dataframe
Exemplo n.º 11
0
        return JDBCStorageConnector(response_object)


def _do_import_featuregroup(job_conf):
    return rest_rpc._put_featuregroup_import_job(job_conf)


def _do_trainingdataset_create(job_conf):
    """
    Creates a job with `job_conf` through a REST call to create a training
    dataset.

    Args:
        :job_conf: training dataset creation job configuration

    Returns:
        The REST response

    Raises:
        :RestAPIError: if there was an error in the REST call to Hopsworks
    """
    return rest_rpc._put_trainingdataset_create_job(job_conf)


# Fetch on-load and cache it on the client
try:
    metadata_cache = _get_featurestore_metadata(
        featurestore=fs_utils._do_get_project_featurestore())
except:
    pass
Exemplo n.º 12
0

def _do_get_training_dataset_statistics(training_dataset_name, featurestore=None, training_dataset_version=1):
    """
    Gets the computed statistics (if any) of a training dataset

    Args:
        :training_dataset_name: the name of the training dataset
        :featurestore: the featurestore where the training dataset resides
        :training_dataset_version: the version of the training dataset

    Returns:
          A Statistics Object
    """
    training_dataset_id = _get_training_dataset_id(featurestore, training_dataset_name, training_dataset_version)
    featurestore_id = _get_featurestore_id(featurestore)
    response_object = rest_rpc._get_training_dataset_rest(training_dataset_id, featurestore_id)
    # .get() returns None if key dont exists intead of exception
    descriptive_stats_json = response_object.get(constants.REST_CONFIG.JSON_FEATUREGROUP_DESC_STATS)
    correlation_matrix_json = response_object.get(constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURE_CORRELATION)
    features_histogram_json = response_object.get(constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_HISTOGRAM)
    feature_clusters = response_object.get(constants.REST_CONFIG.JSON_FEATUREGROUP_FEATURES_CLUSTERS)
    return Statistics(descriptive_stats_json, correlation_matrix_json, features_histogram_json, feature_clusters)


# Fetch on-load and cache it on the client
try:
    metadata_cache = _get_featurestore_metadata(featurestore=fs_utils._do_get_project_featurestore())
except:
    pass