def run_sampler_on_event(gene, ni, ne, nb, read_len, overhang_len, num_iters, output_dir, confidence_level=.95): """ Run sampler on a two-isoform gene event. """ print "Running sampler on a two-isoform event..." print " - Gene label: ", gene.label, gene print " - NI, NE, NB: %d, %d, %d" %(ni, ne, nb) print "Using default sampler parameters." if gene.chrom != None: # Index output by chromosome print "Indexing by chromosome..." output_dir = os.path.join(output_dir, gene.chrom) if not os.path.exists(output_dir): os.makedirs(output_dir) output_filename = os.path.join(output_dir, gene.label) samples = [] cred_interval = [] num_isoforms = len(gene.isoforms) burn_in = 500 lag = 10 hyperparameters = ones(num_isoforms) proposal_diag = 0.05 sigma = set_diag(zeros([num_isoforms-1, num_isoforms-1]), proposal_diag) sampler_params = {'read_len': read_len, 'overhang_len': overhang_len, 'uniform_proposal': False, 'sigma_proposal': sigma} sampler = MISOSampler(sampler_params, log_dir=output_dir) reads = read_counts_to_read_list(ni, ne, nb) t1 = time.time() sampler_results = sampler.run_sampler(num_iters, reads, gene, hyperparameters, sampler_params, output_filename, burn_in=burn_in, lag=lag) if not sampler_results: return (samples, cred_interval) samples = sampler_results[1] # Compute credible intervals cred_interval = ht.compute_credible_intervals(samples, confidence_level=confidence_level) t2 = time.time() print " - Sampler run took %s seconds." %(str(t2-t1)) # return samples and credible intervals return (samples, cred_interval)
def plot_two_iso_samples(self, fig=None, isoform_index=0, num_rows=1, num_cols=1, subplot_start=1, plots_dir=None, map_estimate=None, simulation_num=1, plot_intervals=False, value_to_label=None, label=None, plot_filename=None, bins=None, bbox_coords=None, with_legend=True, title=None, vanilla=False, plot_mean=False, normed=False, fig_dims=(5, 5)): """ Plot a set of samples for Psi of a two isoform gene. """ if not fig: sampled_psi_fig = plt.figure(figsize=fig_dims, dpi=300) else: sampled_psi_fig = fig ax = sampled_psi_fig.add_subplot(num_rows, num_cols, subplot_start) num_iters = int(self.params['iters']) burn_in = int(self.params['burn_in']) lag = int(self.params['lag']) percent_acceptance = float(self.params['percent_accept']) proposal_type = self.params['proposal_type'] plt.rcParams['font.size'] = 10 show_spines(ax, ['left', 'bottom']) bins = bins assert((value_to_label == None and label == None) or \ (value_to_label != None and label != None)) # retrieve samples samples_to_plot = self.samples[:, isoform_index] # picasso blue #0276FD if not vanilla: if bins != None: plt.hist(samples_to_plot, align='mid', lw=0.5, facecolor='#0276FD', edgecolor='#ffffff') else: plt.hist(samples_to_plot, align='mid', lw=0.5, facecolor='#0276FD', edgecolor='#ffffff') else: plt.hist(samples_to_plot, align='mid', facecolor='#0276FD', edgecolor='#0276FD') plt.xlabel(r'${\hat{\Psi}}_{\mathregular{MISO}}$') plt.ylabel('Frequency') plt.xlim([0, 1]) # Normalize samples if normed: yticks = list(plt.gca().get_yticks()) print "yticks: ", yticks ytick_labels = [ "%.2f" % (float(ytick) / float(normed)) for ytick in yticks ] ax.set_yticklabels(ytick_labels) # samples_to_plot = samples_to_plot / float(len(samples_to_plot)) # curr_tick_labels = [label.get_label() for label in ax.get_yticklabels()] # print "Current tick labels: ", curr_tick_labels # new_tick_labels = [] # for label in curr_tick_labels: # if len(label) > 0: # new_label = "%.1f" %(float(label) / normed) # else: # new_label = "" # new_tick_labels.append(new_label) # #ax.set_yticklabels(new_tick_labels) curr_axes = plt.gca() # Plot MAP estimate for same data if map_estimate: l = plt.axvline(x=map_estimate, color='b', linewidth=1.2, ls='-', label=r'${\hat{\Psi}}_{MAP}\ =\ %.2f$' % (map_estimate)) # Plot true Psi if self.true_psi: plot_id = "%dsimul_%diters_%dburnin_%dlag_%s_truepsi_%.2f.pdf" \ %(simulation_num, num_iters, burn_in, lag, proposal_type, self.true_psi) l = plt.axvline(x=self.true_psi, color='r', linewidth=1.2, ls='-', label=r'True $\Psi$') else: # Unknown true Psi plot_id = "%dsimul_%diters_%dburnin_%dlag_%s_%s_truepsi.pdf" \ %(simulation_num, num_iters, burn_in, lag, proposal_type, 'unknown') if value_to_label: l = plt.axvline(x=value_to_label, color='r', linewidth=1.2, ls='-', label=label) # plot credible intervals if given if plot_intervals: # print "Plotting %.2f confidence intervals" %(plot_intervals * 100) interval_c1, interval_c2 = ht.compute_credible_intervals( samples_to_plot, plot_intervals) plt.axvline(x=interval_c1, color='#999999', linewidth=0.7, ls='--', label=r'%d' % (plot_intervals * 100) + '% CI') plt.axvline(x=interval_c2, color='#999999', linewidth=0.7, ls='--') if plot_mean: sample_mean = mean(samples_to_plot) plt.axvline(x=sample_mean, color='r', linewidth=0.8, label='Mean') if with_legend and (plot_intervals or self.true_psi): if not bbox_coords: lg = plt.legend(handletextpad=0.172, borderpad=0.01, labelspacing=.008, handlelength=1.4, loc='best', numpoints=1) else: lg = plt.legend(handletextpad=0.172, borderpad=0.01, labelspacing=.008, handlelength=1.4, loc='best', numpoints=1, bbox_to_anchor=bbox_coords) lg.get_frame().set_linewidth(0) for t in lg.get_texts(): t.set_fontsize(8) if title: plt.title(title) if plots_dir: if not plot_filename: plt.savefig(plots_dir + "sampled_psi_hist_%s" % (plot_id)) else: plt.savefig(plots_dir + plot_filename + '.pdf') return curr_axes
def run_sampler_on_event(gene, ni, ne, nb, read_len, overhang_len, num_iters, output_dir, confidence_level=.95): """ Run sampler on a two-isoform gene event. """ print "Running sampler on a two-isoform event..." print " - Gene label: ", gene.label, gene print " - NI, NE, NB: %d, %d, %d" % (ni, ne, nb) print "Using default sampler parameters." if gene.chrom != None: # Index output by chromosome print "Indexing by chromosome..." output_dir = os.path.join(output_dir, gene.chrom) if not os.path.exists(output_dir): os.makedirs(output_dir) output_filename = os.path.join(output_dir, gene.label) samples = [] cred_interval = [] num_isoforms = len(gene.isoforms) burn_in = 500 lag = 10 hyperparameters = ones(num_isoforms) proposal_diag = 0.05 sigma = set_diag(zeros([num_isoforms - 1, num_isoforms - 1]), proposal_diag) sampler_params = { 'read_len': read_len, 'overhang_len': overhang_len, 'uniform_proposal': False, 'sigma_proposal': sigma } sampler = MISOSampler(sampler_params, log_dir=output_dir) reads = read_counts_to_read_list(ni, ne, nb) t1 = time.time() sampler_results = sampler.run_sampler(num_iters, reads, gene, hyperparameters, sampler_params, output_filename, burn_in=burn_in, lag=lag) if not sampler_results: return (samples, cred_interval) samples = sampler_results[1] # Compute credible intervals cred_interval = ht.compute_credible_intervals( samples, confidence_level=confidence_level) t2 = time.time() print " - Sampler run took %s seconds." % (str(t2 - t1)) # return samples and credible intervals return (samples, cred_interval)
def plot_two_iso_samples(self, fig=None, isoform_index=0, num_rows=1, num_cols=1, subplot_start=1, plots_dir=None, map_estimate=None, simulation_num=1, plot_intervals=False, value_to_label=None, label=None, plot_filename=None, bins=None, bbox_coords=None, with_legend=True, title=None, vanilla=False, plot_mean=False, normed=False, fig_dims=(5, 5)): """ Plot a set of samples for Psi of a two isoform gene. """ if not fig: sampled_psi_fig = plt.figure(figsize=fig_dims, dpi=300) else: sampled_psi_fig = fig ax = sampled_psi_fig.add_subplot(num_rows, num_cols, subplot_start) num_iters = int(self.params['iters']) burn_in = int(self.params['burn_in']) lag = int(self.params['lag']) percent_acceptance = float(self.params['percent_accept']) proposal_type = self.params['proposal_type'] plt.rcParams['font.size'] = 10 show_spines(ax, ['left', 'bottom']) bins = bins assert((value_to_label == None and label == None) or \ (value_to_label != None and label != None)) # retrieve samples samples_to_plot = self.samples[:, isoform_index] # picasso blue #0276FD if not vanilla: if bins != None: plt.hist(samples_to_plot, align='mid', lw=0.5, facecolor='#0276FD', edgecolor='#ffffff') else: plt.hist(samples_to_plot, align='mid', lw=0.5, facecolor='#0276FD', edgecolor='#ffffff') else: plt.hist(samples_to_plot, align='mid', facecolor='#0276FD', edgecolor='#0276FD') plt.xlabel(r'${\hat{\Psi}}_{\mathregular{MISO}}$') plt.ylabel('Frequency') plt.xlim([0, 1]) # Normalize samples if normed: yticks = list(plt.gca().get_yticks()) print "yticks: ", yticks ytick_labels = ["%.2f" %(float(ytick) / float(normed)) for ytick in yticks] ax.set_yticklabels(ytick_labels) # samples_to_plot = samples_to_plot / float(len(samples_to_plot)) # curr_tick_labels = [label.get_label() for label in ax.get_yticklabels()] # print "Current tick labels: ", curr_tick_labels # new_tick_labels = [] # for label in curr_tick_labels: # if len(label) > 0: # new_label = "%.1f" %(float(label) / normed) # else: # new_label = "" # new_tick_labels.append(new_label) # #ax.set_yticklabels(new_tick_labels) curr_axes = plt.gca() # Plot MAP estimate for same data if map_estimate: l = plt.axvline(x=map_estimate, color='b', linewidth=1.2, ls='-', label=r'${\hat{\Psi}}_{MAP}\ =\ %.2f$' %(map_estimate)) # Plot true Psi if self.true_psi: plot_id = "%dsimul_%diters_%dburnin_%dlag_%s_truepsi_%.2f.pdf" \ %(simulation_num, num_iters, burn_in, lag, proposal_type, self.true_psi) l = plt.axvline(x=self.true_psi, color='r', linewidth=1.2, ls='-', label=r'True $\Psi$') else: # Unknown true Psi plot_id = "%dsimul_%diters_%dburnin_%dlag_%s_%s_truepsi.pdf" \ %(simulation_num, num_iters, burn_in, lag, proposal_type, 'unknown') if value_to_label: l = plt.axvline(x=value_to_label, color='r', linewidth=1.2, ls='-', label=label) # plot credible intervals if given if plot_intervals: # print "Plotting %.2f confidence intervals" %(plot_intervals * 100) interval_c1, interval_c2 = ht.compute_credible_intervals(samples_to_plot, plot_intervals) plt.axvline(x=interval_c1, color='#999999', linewidth=0.7, ls='--', label=r'%d' %(plot_intervals*100) + '% CI') plt.axvline(x=interval_c2, color='#999999', linewidth=0.7, ls='--') if plot_mean: sample_mean = mean(samples_to_plot) plt.axvline(x=sample_mean, color='r', linewidth=0.8, label='Mean') if with_legend and (plot_intervals or self.true_psi): if not bbox_coords: lg = plt.legend(handletextpad=0.172, borderpad=0.01, labelspacing=.008, handlelength=1.4, loc='best', numpoints=1) else: lg = plt.legend(handletextpad=0.172, borderpad=0.01, labelspacing=.008, handlelength=1.4, loc='best', numpoints=1, bbox_to_anchor=bbox_coords) lg.get_frame().set_linewidth(0) for t in lg.get_texts(): t.set_fontsize(8) if title: plt.title(title) if plots_dir: if not plot_filename: plt.savefig(plots_dir + "sampled_psi_hist_%s" %(plot_id)) else: plt.savefig(plots_dir + plot_filename + '.pdf') return curr_axes