def create_plots(dpp_info_path, old_info_path, out_dir): # matplotlib.rc('text',**{'usetex': True}) # old = ([1] * 992) + ([2] * 8) if not os.path.exists(out_dir): os.mkdir(out_dir) dmc_sim = DMCSimulationResults(dpp_info_path) dmc_sim_old = DMCSimulationResults(old_info_path) psi_path = dmc_sim.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt' psi_path_old = dmc_sim_old.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt' psis = [] for d in spreadsheet_iter([psi_path]): n = int(round(10000 * float(d['estimated_prob']))) psis.extend([int(d['num_of_div_events'])] * n) psis_old = [] for d in spreadsheet_iter([psi_path_old]): n = int(round(10000 * float(d['estimated_prob']))) psis_old.extend([int(d['num_of_div_events'])] * n) bins = range(1, dmc_sim.num_taxon_pairs + 2) hd = HistData(x = psis, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) # hd_old= HistData(x = old, hd_old= HistData(x = psis_old, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) tick_labels = [] for x in bins[0:-1]: if x % 2: tick_labels.append(str(x)) else: tick_labels.append('') xticks_obj = Ticks(ticks = bins, labels = tick_labels, horizontalalignment = 'left') hist = ScatterPlot(hist_data_list = [hd], x_label = 'Number of divergence events', y_label = 'Posterior probability', xticks_obj = xticks_obj) hist_old = ScatterPlot(hist_data_list = [hd_old], x_label = 'Number of divergence events', y_label = 'Posterior probability', xticks_obj = xticks_obj) hist.set_xlim(left = bins[0], right = bins[-1]) hist_old.set_xlim(left = bins[0], right = bins[-1]) hist.set_ylim(bottom = 0.0, top = 0.1) pg = PlotGrid(subplots = [hist], num_columns = 1, height = 4.0, width = 6.5, label_schema = None, auto_height = False) pg.auto_adjust_margins = False pg.margin_top = 1 pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior.pdf')) # hist.set_ylim(bottom = 0.0, top = 1.0) hist.set_ylim(bottom = 0.0, top = 0.5) hist.set_ylabel('') # hist_old.set_ylim(bottom = 0.0, top = 1.0) hist_old.set_ylim(bottom = 0.0, top = 0.5) pg = PlotGrid(subplots = [hist_old, hist], num_columns = 2, height = 3.5, width = 8.0, share_x = True, share_y = True, label_schema = None, auto_height = False, # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'], column_labels = [r'msBayes', r'dpp-msbayes'], column_label_size = 18.0) pg.auto_adjust_margins = False pg.margin_top = 0.92 pg.padding_between_horizontal = 1.0 pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp.pdf')) pg.label_schema = 'uppercase' pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-labels.pdf')) prior_psis = get_dpp_psi_values(dmc_sim.num_taxon_pairs, 1.5, 18.099702, num_sims = 100000) prior_hd = HistData(x = prior_psis, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) prior_hist = ScatterPlot(hist_data_list = [prior_hd], x_label = 'Number of divergence events', y_label = 'Probability', xticks_obj = xticks_obj) prior_hist.set_xlim(left = bins[0], right = bins[-1]) prior_hist.set_ylim(bottom = 0.0, top = 0.12) hist.set_ylim(bottom = 0.0, top = 0.12) pg = PlotGrid(subplots = [prior_hist, hist], num_columns = 2, height = 3.5, width = 8.0, share_x = True, share_y = True, label_schema = None, auto_height = False, # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'], column_labels = [r'Prior', r'Posterior'], column_label_size = 18.0) pg.auto_adjust_margins = False pg.margin_top = 0.92 pg.padding_between_horizontal = 1.0 pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior.pdf')) pg.label_schema = 'uppercase' pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior-lablels.pdf')) prior_psis_old = [] for i in range(22): prior_psis_old.extend([i + 1] * 100) prior_hd_old = HistData(x = prior_psis_old, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) prior_hist_old = ScatterPlot(hist_data_list = [prior_hd_old], x_label = 'Number of divergence events', y_label = 'Prior probability', xticks_obj = xticks_obj) prior_hist.set_xlim(left = bins[0], right = bins[-1]) prior_hist.set_ylim(bottom = 0.0, top = 0.5) hist.set_ylim(bottom = 0.0, top = 0.5) prior_hist.set_ylim(bottom = 0.0, top = 0.5) for h in [hist_old, hist, prior_hist_old, prior_hist]: h.set_ylabel(ylabel = '') h.set_xlabel(xlabel = '') h.set_title_text('') h.set_extra_y_label('') pg = PlotGrid(subplots = [hist_old, hist, prior_hist_old, prior_hist], num_columns = 2, height = 6.0, width = 8.0, share_x = True, share_y = False, label_schema = None, auto_height = False, title = r'Number of divergence events', title_top = False, title_size = 16.0, y_title = 'Probability', y_title_size = 16.0, column_labels = [r'msBayes', r'dpp-msbayes'], row_labels = ['Posterior', 'Prior'], column_label_offset = 0.07, column_label_size = 22.0, row_label_offset = 0.04, row_label_size = 20.0) pg.auto_adjust_margins = False pg.margin_top = 0.94 pg.margin_bottom = 0.045 pg.margin_right = 0.95 pg.margin_left = 0.045 pg.padding_between_vertical = 0.5 pg.padding_between_horizontal = 1.0 pg.reset_figure() pg.set_shared_x_limits() pg.set_shared_y_limits(by_row = True) pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-with-prior.pdf'))
def get_histograms(config_path, info_path, num_samples = 10000, num_div_values = None, div_model_values = None, ordered_div_model_values = None, iteration_index = 99, y_limits = [0.45, 0.45, 0.05, 0.05], xtick_label_size = 8.0): cfg = MsBayesConfig(config_path) dmc = DMCSimulationResults(info_path) npairs = dmc.num_taxon_pairs psi_path = (dmc.get_result_path_prefix(1, 1, 1) + '{0}-psi-results.txt'.format(iteration_index)) sum_path = (dmc.get_result_path_prefix(1, 1, 1) + '{0}-posterior-summary.txt'.format(iteration_index)) psis = get_values_psi_path(psi_path) omega, omega_hpd = get_omega_from_summary_path(sum_path) (num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis) = ( num_div_values, div_model_values, ordered_div_model_values) if ((not num_div_values) or (not div_model_values) or (not ordered_div_model_values)): if cfg.div_model_prior == 'dpp': num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_dpp_prior_values( config_path = config_path, num_samples = num_samples) elif cfg.div_model_prior == 'uniform': num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_uniform_prior_values( npairs = npairs, num_samples = num_samples) elif cfg.div_model_prior == 'psi': num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_psi_uniform_prior_values( npairs = npairs, num_samples = num_samples) # Extra bin for zero values bins = range(0, npairs + 2) hds = [] for p in [psis, num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis]: hds.append(HistData(x = p, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0)) tick_labels = [] for x in bins[0:-1]: if x % 2: tick_labels.append(str(x)) else: tick_labels.append('') xticks_obj = Ticks(ticks = bins, labels = tick_labels, horizontalalignment = 'left', size = xtick_label_size) hists = [] for i, hd in enumerate(hds): right_text = '' if i == 0: right_text = r'$D_T = {0:.2f} ({1:.2f}-{2:.2f})$'.format(omega, omega_hpd[0], omega_hpd[1]) hist = ScatterPlot(hist_data_list = [hd], right_text = right_text, xticks_obj = xticks_obj) # cut off extra zero-valued bin hist.set_xlim(left = bins[1], right = bins[-1]) top = y_limits[i] hist.set_ylim(bottom = 0.0, top = top) hist.right_text_size = 10.0 hist.plot_label_size = 12.0 yticks = [i for i in hist.ax.get_yticks()] ytick_labels = [i for i in yticks] if len(ytick_labels) > 5: for i in range(1, len(ytick_labels), 2): ytick_labels[i] = '' yticks_obj = Ticks(ticks = yticks, labels = ytick_labels, size = 10.0) hist.yticks_obj = yticks_obj hists.append(hist) return hists