示例#1
0
def fit_dea(data, default_contrast, **kwargs):
    # Create and fit analysis object
    dea = DEAnalysis(data, **kwargs)
    dea.fit_contrasts(dea.default_contrasts[default_contrast]['contrasts'],
                      fit_names=default_contrast)
    der = dea.results[default_contrast]  # type: DEResults
    scores = der.score_clustering()
    return dea, scores
示例#2
0
    def fit_dea(dea, data=None, override=False, **kwargs):
        """

        :param data:
        :param default_contrast:
        :param kwargs:
        :return:
        """

        if dea is None or override:
            # Set defaults
            kwargs.setdefault('reference_labels', ['condition', 'time'])
            kwargs.setdefault('index_names',
                              ['condition', 'replicate', 'time'])

            # Make the dea object and fit it
            new_dea = DEAnalysis(data, **kwargs)
            new_dea.fit_contrasts(new_dea.default_contrasts, status=True)

        else:
            new_dea = dea

        return new_dea
示例#3
0
def _read_permute(p, permutes_path, grouped):
    # Load data and fit to get permuted data p-values
    p_idx = p.split("_")[0]
    p_dea = DEAnalysis(pd.read_pickle("{}/{}".format(permutes_path, p)),
                       time='Time',
                       replicate='rep',
                       reference_labels=['condition', 'Time'],
                       log2=False)
    p_dea.fit_contrasts()
    # Score ranking against the original clusters
    p_der = p_dea.results['ko-wt']
    weighted_lfc = (1 - p_der.p_value) * p_der.continuous.loc[
        p_der.p_value.index, p_der.p_value.columns]
    scores = get_scores(grouped, p_der.continuous.loc[:,
                                                      p_der.p_value.columns],
                        weighted_lfc).sort_index()
    scores['score'] = scores['score'] * (
        1 - p_der.continuous['adj_pval']).sort_index().values
    scores.sort_values('score', ascending=False, inplace=True)
    # drop cluster column, rename score, and add to real scores
    scores.drop('Cluster', inplace=True, axis=1)
    scores.columns = ['p{}_score'.format(p_idx)]

    return scores
示例#4
0
def load_sim_data(compiled_path):
    # todo: needs to be fully functionalized
    """
    Load simulation data to match
    :param compiled_path:
    :return:
    """
    idx = pd.IndexSlice
    # Load sim data and cleanup appropriately
    sim_data = pd.read_pickle(compiled_path)
    sim_data = sim_data.loc['y', idx[:, :, 1, :]]
    sim_data.columns = sim_data.columns.remove_unused_levels()
    sim_data.columns.set_names(['replicate', 'time'],
                               level=[1, 3],
                               inplace=True)
    sim_data.columns.set_levels(['ki', 'pten', 'wt'], level=0, inplace=True)

    sim_dea = DEAnalysis(sim_data,
                         reference_labels=contrast_labels,
                         index_names=sample_features)
    return sim_dea
示例#5
0
def fit_dea(path, data=None, override=False, **dea_kwargs):
    """

    :param path:
    :param override:
    :return:
    """
    dea_kwargs.setdefault('counts', True)
    dea_kwargs.setdefault('log2', False)

    try:
        if override:
            raise ValueError('Override to retrain')
        dea = pd.read_pickle(path)
    except (FileNotFoundError, ValueError) as e:
        dea = DEAnalysis(data, **dea_kwargs)
        dea.fit_contrasts(dea.default_contrasts)
        dea.to_pickle(path)
    return dea
示例#6
0
                get_data(ts_file,
                         input_type,
                         c,
                         n_timeseries,
                         reps,
                         p_labels,
                         t=t)
            ])

    idx = pd.IndexSlice
    data.sort_index(inplace=True)

    # Censored object
    dea = DEAnalysis(data.loc[idx['activating', ['wt', 'ko'], :,
                                  perturb, :]].T,
                     time='Time',
                     replicate='rep',
                     reference_labels=['condition', 'Time'],
                     log2=False)

    # Fit contrasts and save to pickle
    if fit_data:
        run_fit(dea, 'intermediate_data/{}_dea.pkl'.format(base_name))

    # Save permuted data
    if save_p:
        grouped = dea.data.groupby(level='condition', axis=1)
        save_permutes("intermediate_data/{}_permutes/".format(base_name),
                      grouped,
                      n=n_p)

    if analyze_p:
示例#7
0
import pandas as pd
from pydiffexp import DEAnalysis

# Load the data
data_path = "/Volumes/Hephaestus/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/pydiffexp/data/GSE13100/" \
            "bgcorrected_GSE13100_TR_data.pkl"
raw_data = pd.read_pickle(data_path)

# The example data has been background corrected, so set everything below 0 to a trivial positive value of 1
raw_data[raw_data <= 0] = 1

# Make the Differential Expression Analysis Object
# The reference labels specify how samples will be organized into unique values
dea = DEAnalysis(raw_data, replicate='rep', reference_labels=['condition', 'time'])

# Data can be standarized if desired
# norm_data = dea.standardize()

# Fit the contrasts and save the object
dea.fit_contrasts()
dea.to_pickle('intermediate_data/yeast_osmoTR_dea.pkl')
示例#8
0
# Load the data
test_path = "/Users/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/sprouty/data/raw_data/GSE63497_Oncogene_Formatted.tsv"
# test_path = "/Users/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/sprouty/data/raw_data/GSE63497_VEC_CRE_Formatted.tsv"
raw_data = pd.read_csv(test_path, sep='\t', index_col=0)
hierarchy = ['condition', 'replicate']

# The example data has been background corrected, so set everything below 0 to a trivial positive value of 1
raw_data[raw_data <= 0] = 1

# Remove all genes with low counts so voom isn't confused
raw_data = raw_data[~(raw_data < 5).all(axis=1)]
# Make the Differential Expression Analysis Object
# The reference labels specify how samples will be organized into unique values
dea = DEAnalysis(raw_data,
                 index_names=hierarchy,
                 reference_labels=['condition'],
                 time=None,
                 counts=True)
# Data can be standarized if desired
# norm_data = dea.standardize()

# Fit the contrasts and save the object
# cont = dea.possible_contrasts()
# cont[0] = 'CRE-BRaf'
dea.fit_contrasts()
dep = DEPlot(dea)
sys.exit()
# Volcano Plot
x = dea.results[0].top_table(p=0.05)

# sns.clustermap(x.iloc[:, :10])
示例#9
0
import pandas as pd
from pydiffexp import DEAnalysis

# Load the data
test_path = "/Users/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/sprouty/data/raw_data/all_data_formatted.csv"
raw_data = pd.read_csv(test_path, index_col=0)
hierarchy = ['condition', 'well', 'time', 'replicate']

# The example data has been background corrected, so set everything below 0 to a trivial positive value of 1
raw_data[raw_data <= 0] = 1

# Make the Differential Expression Analysis Object
# The reference labels specify how samples will be organized into unique values
dea = DEAnalysis(raw_data,
                 index_names=hierarchy,
                 reference_labels=['condition', 'time'])

# Data can be standarized if desired
# norm_data = dea.standardize()

# Fit the contrasts and save the object
dea.fit_contrasts()

pd.set_option('display.width', 2000)
# print(dea.data)
# print(dea.results['KO-WT'].continuous.loc['NEDD4'])
# print(dea.results['KO-WT'].continuous)
# print(dea.print_experiment_summary())

for key, value in dea.results.items():