示例#1
0
 def metadata(self):
     """return metadata"""
     metadata_cols = utils.get_metadata(self, self.metadata_string,
                                        self.prefix)
     return DataFrame(self[metadata_cols],
                      metadata_string=self.metadata_string,
                      prefix=self.prefix)
示例#2
0
def test_get_metadata_simple():
    x = [1,2,3,4]
    y = [4,3,2,1]
    z = [1,2,3,4]
    columns = ["colA", "colB", "Metadata_A"]
    test_df = pd.DataFrame(list(zip(x, y, z)), columns=columns)
    cols = utils.get_metadata(test_df)
    assert cols == ["Metadata_A"]
示例#3
0
def test_get_metadata_different_case():
    x = [1,2,3,4]
    y = [4,3,2,1]
    z = [1,2,3,4]
    a = [4,3,5,1]
    columns = ["colA", "colB", "metadata_A", "Metadata_A"]
    test_df = pd.DataFrame(list(zip(x, y, z, a)), columns=columns)
    cols = utils.get_metadata(test_df, metadata_string="metadata")
    assert cols == ["metadata_A"]
示例#4
0
def test_get_metadata_middle_prefix():
    x = [1,2,3,4]
    y = [4,3,2,1]
    z = [1,2,3,4]
    a = [4,3,5,1]
    columns = ["colA", "colB", "something_Metadata", "Metadata_A"]
    test_df = pd.DataFrame(list(zip(x, y, z, a)), columns=columns)
    cols = utils.get_metadata(test_df)
    assert cols == ["Metadata_A"]
示例#5
0
def aggregate(data, on, method="median", metadata_string="Metadata_", prefix=True):
    """
    Aggregate dataset

    Parameters
    -----------
    data : pandas DataFrame
        DataFrame
    on : string or list of strings
        column(s) with which to group by and aggregate the dataset.
    method : string (default="median")
        method to average each group. options = "median" or "mean"
    **kwargs : additional args to utils.get_metadata / utils.get_featuredata

    Returns
    -------
    agg_df : pandas DataFrame
        aggregated dataframe, with a row per value of 'on'
    """
    _check_inputs(data, on, method)
    _check_featuredata(data, on, metadata_string, prefix)
    # keep track of original column order
    df_cols = data.columns.tolist()
    grouped = data.groupby(on, as_index=False)
    if method == "mean":
        agg = grouped.aggregate(np.mean)
    if method == "median":
        agg = grouped.aggregate(np.median)
    df_metadata = data[utils.get_metadata(data, metadata_string, prefix)].copy()
    # add indexing column to metadata if not already present
    df_metadata[on] = data[on]
    # drop metadata to the same level as aggregated data
    df_metadata.drop_duplicates(subset=on, inplace=True)
    # merge aggregated and feature data
    merged_df = pd.merge(agg, df_metadata, on=on, how="outer",
                         suffixes=("remove_me", ""))
    # merge untracked columns with merged data
    merged_df = merged_df[df_cols]
    # re-arrange to columns are in original order
    assert len(merged_df.columns) == len(data.columns)
    return merged_df
示例#6
0
文件: stats.py 项目: Swarchal/morar
def scale_features(data, metadata_string="Metadata_", prefix=True):
    """
    scale and centre features with a z-score

    Parameters
    ----------
    df : pandas DataFrame
        DataFrame
    **kwargs : additional arguments to utils.get_featuredata/get_metadata

    Returns
    -------
    scaled : pandas DataFrame
        dataframe of same dimensions as df, with scaled feature values
    """
    data_columns = data.columns.tolist()
    feature_data = data[utils.get_featuredata(data, metadata_string, prefix)]
    metadata = data[utils.get_metadata(data, metadata_string, prefix)]
    scaled_featuredata = feature_data.apply(z_score)
    scaled_both = pd.concat([scaled_featuredata, metadata], axis=1)
    # return columns to original order
    scaled_both = scaled_both[data_columns]
    return scaled_both
示例#7
0
文件: stats.py 项目: Swarchal/morar
def scale_features(data, metadata_string="Metadata_", prefix=True):
    """
    scale and centre features with a z-score

    Parameters
    ----------
    df : pandas DataFrame
        DataFrame
    **kwargs : additional arguments to utils.get_featuredata/get_metadata

    Returns
    -------
    scaled : pandas DataFrame
        dataframe of same dimensions as df, with scaled feature values
    """
    data_columns = data.columns.tolist()
    feature_data = data[utils.get_featuredata(data, metadata_string, prefix)]
    metadata = data[utils.get_metadata(data, metadata_string, prefix)]
    scaled_featuredata = feature_data.apply(z_score)
    scaled_both = pd.concat([scaled_featuredata, metadata], axis=1)
    # return columns to original order
    scaled_both = scaled_both[data_columns]
    return scaled_both
示例#8
0
 def metacols(self):
     """return list of metadata column names"""
     return utils.get_metadata(self, self.metadata_string, self.prefix)
示例#9
0
 def metadata(self):
     """return metadata"""
     metadata_cols = utils.get_metadata(self, self.metadata_string, self.prefix)
     return DataFrame(self[metadata_cols],
                      metadata_string=self.metadata_string,
                      prefix=self.prefix)
示例#10
0
 def metacols(self):
     """return list of metadata column names"""
     return utils.get_metadata(self, self.metadata_string, self.prefix)