# plot top several clusters plot_top_clust_ephys_curves(cluster_super_means, y_cnts=y_cnts, overall_means=super_data_means, overall_stds=super_data_stds, clust_labels=clust_labels, n_to_show=n_top_clust, inches=inches) save_fig(join(ephys_viz_dir, 'ephys_curves_top_clust.png')) # plot each (non-trival) cluster # non_trivial_clusters = y_cnts[y_cnts >= 5].index.values non_trivial_clusters = y_cnts[y_cnts >= 0].index.values save_dir = make_and_get_dir(ephys_viz_dir, 'cluster_curves') for cl_idx in non_trivial_clusters: label = clust_labels[cl_idx] values = {} for name in cluster_super_means.keys(): values[name] = cluster_super_means[name][cl_idx] plt.figure(figsize=(2 * n_datasets * inches, inches)) plot_cluster_ephys_curve(values, overall_means=super_data_means, overall_stds=super_data_stds, y_label=label)
parser.add_argument('--user_bd_mvmm_best_idx', default=None, help='Optional user provided index for' 'best bd MVMM model.') parser.add_argument('--user_log_pen_mvmm_best_idx', default=None, help='Optional user provided index for' 'best log pen MVMM model.') parser.add_argument('--select_metric', type=str, default='bic', help='Model selection criterion.') args = parser.parse_args() results_dir = args.results_dir log_dir = make_and_get_dir(results_dir, 'log') fitting_dir = make_and_get_dir(results_dir, 'model_fitting') mv_fitting_dir = make_and_get_dir(fitting_dir, 'multi_view') model_sel_dir = make_and_get_dir(results_dir, 'model_selection') # bd_sel_dir = make_and_get_dir(model_sel_dir, 'bd_mvmm') # log_sel_dir = make_and_get_dir(model_sel_dir, 'log_pen_mvmm') bd_sel_dir = make_and_get_dir(model_sel_dir) log_sel_dir = make_and_get_dir(model_sel_dir) opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics') res_writer = ResultsWriter(os.path.join(log_dir, 'mvmm_model_selection.txt'), delete_if_exists=True) res_writer.write('user_bd_mvmm_best_idx: {}'.
parser.add_argument('--event_col', help='Column name of the survival event data.') args = parser.parse_args() results_dir = args.results_dir fpaths = args.fpaths vars2compare_fpath = args.vars2compare_fpath super_fpaths = args.super_fpaths survival_fpath = args.survival_fpath duration_col = args.duration_col event_col = args.event_col # setup directories log_dir = make_and_get_dir(results_dir, 'log') fitting_dir = make_and_get_dir(results_dir, 'model_fitting') model_sel_dir = make_and_get_dir(results_dir, 'model_selection') opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics') clust_interpret_dir = make_and_get_dir(results_dir, 'interpret') # load models and data models = load(join(fitting_dir, 'selected_models')) view_data, dataset_names, sample_names, view_feat_names = \ load_data(*fpaths) n_views = len(fpaths) view_data = [pd.DataFrame(view_data[v], index=sample_names, columns=view_feat_names[v]) for v in range(n_views)]
help='Maximum number of components for concatenated data.') parser.add_argument('--exclude_cat_gmm', action='store_true', default=False, help='Dont run cat gmm.') parser = add_parsers(parser, to_add=[general_opt_parser, base_gmm_parser]) parser = bayes_parser(parser) args = parser.parse_args() bayes_submit(args) args = format_mini_experiment(args) results_dir = args.results_dir log_dir = make_and_get_dir(results_dir, 'log') fitting_dir = make_and_get_dir(results_dir, 'model_fitting', 'single_view') res_writer = ResultsWriter(join(log_dir, 'single_view_fitting.txt'), delete_if_exists=True) res_writer.write(args) run_start_time = time() n_views = len(args.fpaths) ############# # load data # #############
parser.add_argument('--feat_list', default='icluster', help='Which feature list to use e.g. icluster,' 'all, top_2000 by variace.') parser = bayes_parser(parser) args = parser.parse_args() bayes_submit(args) cancer_type = args.cancer_type feat_list = args.feat_list print(cancer_type) raw_data_dir = TCGAPaths().raw_data_dir pca_dir = make_and_get_dir(TCGAPaths().top_dir, 'pca', cancer_type, feat_list) diagnostics_dir = make_and_get_dir(pca_dir, 'diagnostics') feat_save_dir = join(TCGAPaths().pro_data_dir, cancer_type, feat_list) datasets = ['rna', 'mi_rna', 'dna_meth', 'cp'] data = {} # save processed data for k in datasets: fpath = join(feat_save_dir, '{}.csv'.format(k)) data[k] = pd.read_csv(fpath, index_col=0) ######################## # extract PCA features # ########################
log_pen_mvmm_parser, bd_mvmm_parser, spect_pen_parser ]) parser = bayes_parser(parser) args = parser.parse_args() args = format_mini_experiment(args) args.job_name = args.sim_name bayes_submit(args) if args.sim_name is None: args.sim_name = 'meow' save_dir = make_and_get_dir(Paths().results_dir, 'single', args.sim_name) res_writer = ResultsWriter(os.path.join(save_dir, 'results.txt'), delete_if_exists=True) res_writer.write('\n\n\n Input args') res_writer.write(args) rng = check_random_state(args.metaseed) to_exclude = [] # if args.exclude_sp_mvmm: to_exclude.append('sp_mvmm') if args.exclude_bd_mvmm: to_exclude.append('bd_mvmm') if args.exclude_log_pen_mvmm:
cancer_type = args.cancer_type feat_list = args.feat_list handle_nans = args.handle_nans assert feat_list in ['all', 'icluster'] or 'top' in feat_list filter_kws = { 'tumor_sample_only': True, 'primary_tumor_only': True, 'keep_first_of_participant_multiples': True, 'ensure_participant_idx': True, 'verbose': True } raw_data_dir = TCGAPaths().raw_data_dir pro_data_dir = make_and_get_dir(TCGAPaths().pro_data_dir, cancer_type) feat_save_dir = make_and_get_dir(pro_data_dir, feat_list) res_writer = ResultsWriter(join(feat_save_dir, 'log.txt'), delete_if_exists=True) res_writer.write(args) fnames = { 'mi_rna': 'pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.csv', 'rna': 'EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2-v2.geneExp.tsv', 'dna_meth': 'jhu-usc.edu_PANCAN_merged_HumanMethylation27_HumanMethylation450.betaValue_whitelisted.tsv', 'cp': 'all_data_by_genes_whitelisted.tsv' }
results, model_dfs, model_dfs_at_truth, \ fit_models, pi_true_summary, \ n_samples_tr_seq, zero_thresh, data = load_results(sim_name) timing_data = get_timing_data(results, fit_models) n_comp_tot_true = pi_true_summary['n_comp_tot_true'] n_blocks_true = pi_true_summary['n_blocks_true'] models2exclude = [] for model_name in ['log_pen_mvmm', 'bd_mvmm', 'sp_mvmm']: if model_dfs[model_name].shape[0] == 0: models2exclude.append(model_name) # where to save simulation results results_save_dir = make_and_get_dir(Paths().results_dir, sim_name) # results_save_dir = join(Paths().results_dir, sim_name) # os.makedirs(results_save_dir, exist_ok=True) ################## # Set parameters # ################## # model names model_names = { 'log_pen_mvmm': 'log penalized MVMM', 'bd_mvmm': 'block diagonal MVMM', 'cat_gmm': 'Mixture model on concatenated data', 'full_mvmm': 'MVMM', 'sp_mvmm': 'spectral penalized MVMM', 'marginal_view_0': 'Mixture model on view 1 marginal data',
log_pen_mvmm_parser ]) parser = bayes_parser(parser) args = parser.parse_args() bayes_submit(args) args = format_mini_experiment(args) # stub = 'mvmm_fitting_{}_{}'.format(args.n_comp_v0, args.n_comp_v1) stub = 'mvmm_fitting' for nc in args.n_view_comps: stub += '_{}'.format(nc) results_dir = args.results_dir # results_dir = make_and_get_dir(args.top_dir, args.sim_name) log_dir = make_and_get_dir(results_dir, 'log') fitting_dir = make_and_get_dir(results_dir, 'model_fitting') model_sel_dir = make_and_get_dir(results_dir, 'model_selection') opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics') res_writer = ResultsWriter(os.path.join(log_dir, '{}.txt'.format(stub)), delete_if_exists=True) res_writer.write(args) run_start_time = time() to_exclude = [] # if args.exclude_sp_mvmm: # to_exclude.append('sp_mvmm') if args.exclude_bd_mvmm:
parser.add_argument('--event_col', help='Column name of the survival event data.') args = parser.parse_args() results_dir = args.results_dir fpaths = args.fpaths vars2compare_fpath = args.vars2compare_fpath super_fpaths = args.super_fpaths survival_fpath = args.survival_fpath print(args) # setup directories log_dir = make_and_get_dir(results_dir, 'log') fitting_dir = make_and_get_dir(results_dir, 'model_fitting') model_sel_dir = make_and_get_dir(results_dir, 'model_selection') opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics') clust_interpret_dir = make_and_get_dir(results_dir, 'interpret') # load models and data n_views = len(fpaths) models = load(join(fitting_dir, 'selected_models')) view_data, dataset_names, sample_names, view_feat_names = \ load_data(*fpaths) view_data = [ pd.DataFrame(view_data[v], index=sample_names, columns=view_feat_names[v]) for v in range(n_views) ]