Exemplo n.º 1
0
    plot_metric = 'auroc'
    images_dir = Path(cfg.images_dirs['multimodal'], 'auroc')
else:
    plot_metric = 'aupr'
    images_dir = Path(cfg.images_dirs['multimodal'])

# ## Results with compressed features (figures in main paper)
#
# We'll also look at results with raw features later, those figures go in the supplement.
#
# ### Compare raw results

# In[3]:

# load raw data
results_df = au.load_stratified_prediction_results(results_dir, 'gene')

# drop TET2 for now
results_df = results_df[~(results_df.identifier == 'TET2')].copy()

# make sure that we have data for all data types and for two replicates (random seeds)
print(results_df.shape)
print(results_df.seed.unique())
print(results_df.identifier.unique())
print(results_df.training_data.unique())
results_df.head()

# In[4]:

# each subplot will show results for one gene
sns.set({'figure.figsize': (24, 12)})
Exemplo n.º 2
0
import mpmp.utilities.plot_utilities as plu

# In[2]:

# set results directory
old_results_dir = Path(cfg.results_dirs['mutation'], 'methylation_results',
                       'gene').resolve()

new_results_dir = Path(cfg.results_dirs['mutation'],
                       'methylation_results_shuffle_cancer_type',
                       'gene').resolve()

# In[3]:

# load raw data
old_results_df = au.load_stratified_prediction_results(old_results_dir, 'gene')
new_results_df = au.load_stratified_prediction_results(new_results_dir, 'gene')

# here we want to use compressed data for methylation datasets (27k and 450k)
# the results in 02_classify_compressed/compressed_vs_raw_results.ipynb show that
# performance is equal or slightly better for PCA compressed methylation data,
# and it's much easier/faster to fit models on
old_results_df = old_results_df[old_results_df.training_data.isin(
    ['expression'])].copy()
new_results_df = new_results_df[new_results_df.training_data.isin(
    ['expression'])].copy()

old_compressed_results_df = au.load_compressed_prediction_results(
    old_results_dir, 'gene', old_filenames=True)
# load compressed data for me_27k and me_450k
old_compressed_results_df = old_compressed_results_df[
Exemplo n.º 3
0
# if True, save figures to ./images directory
SAVE_FIGS = True

# if True, plot AUROC instead of AUPR
PLOT_AUROC = False
if PLOT_AUROC:
    plot_metric = 'auroc'
    images_dir = Path(cfg.images_dirs['mutation'], 'auroc')
else:
    plot_metric = 'aupr'
    images_dir = Path(cfg.images_dirs['mutation'])

# In[3]:

# load raw data
vogelstein_df = au.load_stratified_prediction_results(vogelstein_results_dir,
                                                      'gene')
vogelstein_df = vogelstein_df[vogelstein_df.training_data.isin(['expression'])]
vogelstein_df['gene_set'] = 'vogelstein'

# make sure that we're correctly pointing to raw data for non-methylation data types
# and that we have data for two replicates (two random seeds)
print(vogelstein_df.shape)
print(vogelstein_df.seed.unique())
print(vogelstein_df.training_data.unique())
vogelstein_df.head()

# In[4]:

# load raw data
top_50_df = au.load_stratified_prediction_results(top_50_results_dir, 'gene')
top_50_df = top_50_df[top_50_df.training_data.isin(['expression'])]
Exemplo n.º 4
0
# set results directory
# this is a mess, TODO move results into same location
results_dir1 = Path(cfg.results_dirs['mutation'], 'bmiq_results', 'gene').resolve()
results_dir2 = Path(cfg.results_dirs['mutation'], 'bmiq_results_2', 'gene').resolve()
results_dir3 = Path(cfg.results_dirs['mutation'], 'bmiq_results_me_control', 'gene').resolve()

# set significance cutoff after FDR correction
SIG_ALPHA = 0.001


# In[3]:


# load raw data
results_df1 = au.load_stratified_prediction_results(results_dir1, 'gene')
results_df1.loc[results_df1.training_data == 'me_27k', 'training_data'] = 'me_27k_corrected'
results_df2 = au.load_stratified_prediction_results(results_dir2, 'gene')
results_df2.loc[results_df2.training_data == 'me_27k', 'training_data'] = 'me_27k_corrected'
results_df3 = au.load_stratified_prediction_results(results_dir3, 'gene')

results_df = pd.concat((results_df1, results_df2, results_df3))

print(results_df.shape)
print(results_df.seed.unique())
print(results_df.training_data.unique())
results_df.head()


# In[4]:
Exemplo n.º 5
0
from adjustText import adjust_text

import mpmp.config as cfg
import mpmp.utilities.analysis_utilities as au

# In[2]:

# set results directory
results_dir = Path(cfg.results_dirs['cancer_type'], 'cancer_type').resolve()
# set significance cutoff after FDR correction
SIG_ALPHA = 0.001

# In[3]:

# load raw data
results_df = au.load_stratified_prediction_results(results_dir, 'cancer_type')
print(results_df.shape)
results_df.head()

# In[4]:

# set this variable to filter plot to certain cancer types
# if not included, plot all 33 of them
# FILTER_CANCER_TYPES = ['LUAD', 'LUSC', 'THCA']
FILTER_CANCER_TYPES = None
filtered_df = results_df[(results_df.signal == 'signal')
                         & (results_df.data_type == 'test')].copy()
if FILTER_CANCER_TYPES is not None:
    filtered_df = filtered_df[(
        filtered_df.cancer_type.isin(FILTER_CANCER_TYPES))]
else: