def build(method_name):
    if method_name == 'sum':
        return NamedFunctor("Sum Replicates",
                            lambda state, mode: aggregate_sum(state))
    if method_name == 'mean':
        return NamedFunctor("Average Replicates",
                            lambda state, mode: aggregate_mean(state))
    if method_name == 'pick':
        return NamedFunctor(
            "Pick High Count Replicate",
            lambda state, mode: aggregate_pick_high_count(state))
def build(meta_col_name: str, one_set: set, pair_strategy="paired_concat"):
    if pair_strategy == "paired_concat":
        return NamedFunctor(
            "Target: " + meta_col_name + "(HouseholdConcat)", lambda state,
            mode: matched_pair_concat(state, meta_col_name, one_set))
    elif pair_strategy == "paired_subtract":
        return NamedFunctor(
            "Target: " + meta_col_name + "(HouseholdSubtract)", lambda state,
            mode: matched_pair_subtract(state, meta_col_name, one_set))
    elif pair_strategy == "unpaired":
        return NamedFunctor(
            "Target: " + meta_col_name,
            lambda state, mode: _target(state, meta_col_name, one_set))
    elif pair_strategy == "paired_subtract_sex_balanced":
        return NamedFunctor(
            "Target: " + meta_col_name,
            lambda state, mode: matched_pair_subtract_sex_balanced(
                state, meta_col_name, one_set))
Пример #3
0
def build_zebra(cov_thresh, cov_file):
    zebra_df = pd.read_csv(cov_file)
    zebra_pass_df = zebra_df[zebra_df.coverage_ratio > cov_thresh]
    zebra_pass_list = zebra_pass_df['genome_id'].to_list()

    def wrapped(state, mode):
        state.df = state.df[state.df.columns.intersection(zebra_pass_list)]
        return state.update_df(state.df)

    return NamedFunctor("Zebra Filter:" + str(cov_thresh), wrapped)
Пример #4
0
def build(method_name, target_count=10000):
    if method_name == 'none':
        return NamedFunctor(
            "No Normalization "
            "(WARN: IGNORES COMPOSITIONALITY)", lambda state, mode: state)

    if method_name == 'rarefy':
        return NamedFunctor(
            "Rarefy", lambda state, mode: rarefy_wrapper(state, target_count))
    if method_name == 'divide_total':
        return NamedFunctor(
            "Truncate Ray To Simplex",
            lambda state, mode: divide_total(state, target_count))
    if method_name == 'ILR':
        # Existing functions for this in skbio
        raise NotImplemented()
    if method_name == 'CLR':
        return NamedFunctor("CLR Transform",
                            lambda state, mode: clr_wrapper(state))
    if method_name == 'ALR':
        # Existing functions for this in skbio
        raise NotImplemented()
    if method_name == 'AST':
        raise NotImplemented()
Пример #5
0
def sum_columns():
    return NamedFunctor("Sum All Columns",
                        lambda state, mode: _sum_columns(state))
Пример #6
0
def build_meta_encoder(col_name, encoder):
    def wrapped(state, mode):
        state.df[col_name] = state.meta_df[col_name].apply(encoder)
        return state.update_df(state.df)

    return NamedFunctor("Include " + col_name, wrapped)
Пример #7
0
def build_feature_set_transform(transformer):
    def wrapped(state, mode):
        return _apply_feature_transform(state, transformer)

    return NamedFunctor(transformer.name, wrapped)
def build_prefix_filter(bad_prefixes):
    return NamedFunctor(
        "Filter Samples By ID Prefix",
        lambda state, mode: _filter_out_sample_id_prefix(state, bad_prefixes))
def build_whitelist_metadata_value(metadata_column, metadata_values):
    return NamedFunctor(
        "Subset Samples by " + metadata_column + " for " +
        str(metadata_values), lambda x, mode: _filter_by_metadata(
            x, metadata_column, metadata_values))
def build_filter_out_empty_samples():
    return NamedFunctor("Filter Empty Samples",
                        lambda state, mode: _filter_zero_sum(state))
Пример #11
0
def build(num_training_samples):
    return NamedFunctor(
        "Downsample Train Set: " + str(num_training_samples),
        lambda state, mode: _downsample(state, mode, num_training_samples))
def build_exact_filter(bad_sample_ids):
    return NamedFunctor(
        "Remove Bad Samples",
        lambda state, mode: _filter_out_sample_ids(state, bad_sample_ids))
Пример #13
0
def plot_correlation_matrix():
    return NamedFunctor("Plot Correlation Heatmap",
                        lambda state, mode: _plot_correlation_matrix_deluxe(state))
Пример #14
0
def plot_scatter():
    return NamedFunctor("Plot Scatter",
                        _plot_scatter)
Пример #15
0
def build_column_filter(chosen_columns):
    return NamedFunctor(
        "Restrict to columns (compositional): " + str(chosen_columns),
        lambda state, mode: _restrict_columns_compositional(
            state, chosen_columns))
def build_shared_filter():
    return NamedFunctor("Filter Samples To Shared IDs",
                        lambda state, mode: _filter_by_shared_ids(state))
Пример #17
0
def build():
    return NamedFunctor("Fix Sample IDs",
                        lambda state, mode: fix_sample_ids(state))