def fit_dea(data, default_contrast, **kwargs): # Create and fit analysis object dea = DEAnalysis(data, **kwargs) dea.fit_contrasts(dea.default_contrasts[default_contrast]['contrasts'], fit_names=default_contrast) der = dea.results[default_contrast] # type: DEResults scores = der.score_clustering() return dea, scores
def fit_dea(dea, data=None, override=False, **kwargs): """ :param data: :param default_contrast: :param kwargs: :return: """ if dea is None or override: # Set defaults kwargs.setdefault('reference_labels', ['condition', 'time']) kwargs.setdefault('index_names', ['condition', 'replicate', 'time']) # Make the dea object and fit it new_dea = DEAnalysis(data, **kwargs) new_dea.fit_contrasts(new_dea.default_contrasts, status=True) else: new_dea = dea return new_dea
def _read_permute(p, permutes_path, grouped): # Load data and fit to get permuted data p-values p_idx = p.split("_")[0] p_dea = DEAnalysis(pd.read_pickle("{}/{}".format(permutes_path, p)), time='Time', replicate='rep', reference_labels=['condition', 'Time'], log2=False) p_dea.fit_contrasts() # Score ranking against the original clusters p_der = p_dea.results['ko-wt'] weighted_lfc = (1 - p_der.p_value) * p_der.continuous.loc[ p_der.p_value.index, p_der.p_value.columns] scores = get_scores(grouped, p_der.continuous.loc[:, p_der.p_value.columns], weighted_lfc).sort_index() scores['score'] = scores['score'] * ( 1 - p_der.continuous['adj_pval']).sort_index().values scores.sort_values('score', ascending=False, inplace=True) # drop cluster column, rename score, and add to real scores scores.drop('Cluster', inplace=True, axis=1) scores.columns = ['p{}_score'.format(p_idx)] return scores
def load_sim_data(compiled_path): # todo: needs to be fully functionalized """ Load simulation data to match :param compiled_path: :return: """ idx = pd.IndexSlice # Load sim data and cleanup appropriately sim_data = pd.read_pickle(compiled_path) sim_data = sim_data.loc['y', idx[:, :, 1, :]] sim_data.columns = sim_data.columns.remove_unused_levels() sim_data.columns.set_names(['replicate', 'time'], level=[1, 3], inplace=True) sim_data.columns.set_levels(['ki', 'pten', 'wt'], level=0, inplace=True) sim_dea = DEAnalysis(sim_data, reference_labels=contrast_labels, index_names=sample_features) return sim_dea
def fit_dea(path, data=None, override=False, **dea_kwargs): """ :param path: :param override: :return: """ dea_kwargs.setdefault('counts', True) dea_kwargs.setdefault('log2', False) try: if override: raise ValueError('Override to retrain') dea = pd.read_pickle(path) except (FileNotFoundError, ValueError) as e: dea = DEAnalysis(data, **dea_kwargs) dea.fit_contrasts(dea.default_contrasts) dea.to_pickle(path) return dea
get_data(ts_file, input_type, c, n_timeseries, reps, p_labels, t=t) ]) idx = pd.IndexSlice data.sort_index(inplace=True) # Censored object dea = DEAnalysis(data.loc[idx['activating', ['wt', 'ko'], :, perturb, :]].T, time='Time', replicate='rep', reference_labels=['condition', 'Time'], log2=False) # Fit contrasts and save to pickle if fit_data: run_fit(dea, 'intermediate_data/{}_dea.pkl'.format(base_name)) # Save permuted data if save_p: grouped = dea.data.groupby(level='condition', axis=1) save_permutes("intermediate_data/{}_permutes/".format(base_name), grouped, n=n_p) if analyze_p:
import pandas as pd from pydiffexp import DEAnalysis # Load the data data_path = "/Volumes/Hephaestus/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/pydiffexp/data/GSE13100/" \ "bgcorrected_GSE13100_TR_data.pkl" raw_data = pd.read_pickle(data_path) # The example data has been background corrected, so set everything below 0 to a trivial positive value of 1 raw_data[raw_data <= 0] = 1 # Make the Differential Expression Analysis Object # The reference labels specify how samples will be organized into unique values dea = DEAnalysis(raw_data, replicate='rep', reference_labels=['condition', 'time']) # Data can be standarized if desired # norm_data = dea.standardize() # Fit the contrasts and save the object dea.fit_contrasts() dea.to_pickle('intermediate_data/yeast_osmoTR_dea.pkl')
# Load the data test_path = "/Users/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/sprouty/data/raw_data/GSE63497_Oncogene_Formatted.tsv" # test_path = "/Users/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/sprouty/data/raw_data/GSE63497_VEC_CRE_Formatted.tsv" raw_data = pd.read_csv(test_path, sep='\t', index_col=0) hierarchy = ['condition', 'replicate'] # The example data has been background corrected, so set everything below 0 to a trivial positive value of 1 raw_data[raw_data <= 0] = 1 # Remove all genes with low counts so voom isn't confused raw_data = raw_data[~(raw_data < 5).all(axis=1)] # Make the Differential Expression Analysis Object # The reference labels specify how samples will be organized into unique values dea = DEAnalysis(raw_data, index_names=hierarchy, reference_labels=['condition'], time=None, counts=True) # Data can be standarized if desired # norm_data = dea.standardize() # Fit the contrasts and save the object # cont = dea.possible_contrasts() # cont[0] = 'CRE-BRaf' dea.fit_contrasts() dep = DEPlot(dea) sys.exit() # Volcano Plot x = dea.results[0].top_table(p=0.05) # sns.clustermap(x.iloc[:, :10])
import pandas as pd from pydiffexp import DEAnalysis # Load the data test_path = "/Users/jfinkle/Documents/Northwestern/MoDyLS/Code/Python/sprouty/data/raw_data/all_data_formatted.csv" raw_data = pd.read_csv(test_path, index_col=0) hierarchy = ['condition', 'well', 'time', 'replicate'] # The example data has been background corrected, so set everything below 0 to a trivial positive value of 1 raw_data[raw_data <= 0] = 1 # Make the Differential Expression Analysis Object # The reference labels specify how samples will be organized into unique values dea = DEAnalysis(raw_data, index_names=hierarchy, reference_labels=['condition', 'time']) # Data can be standarized if desired # norm_data = dea.standardize() # Fit the contrasts and save the object dea.fit_contrasts() pd.set_option('display.width', 2000) # print(dea.data) # print(dea.results['KO-WT'].continuous.loc['NEDD4']) # print(dea.results['KO-WT'].continuous) # print(dea.print_experiment_summary()) for key, value in dea.results.items():