def create_ols_legend_plot(matplot_dict, motif_freq, sample_scores, control_scores, flank_length, legend_title): f = matplot_dict['figure'] gs = matplot_dict['gridspec'] phlyop_plots_legend = plt.Subplot(f, gs, autoscale_on=True) corr_result = get_pearson_corr(motif_freq, remove_flanking_scores(sample_scores, flank_length)) corr_pval = corr_result[1] corr_r2 = corr_result[0] ttest_result = perform_t_test(remove_flanking_scores(sample_scores, flank_length), get_flanking_scores(sample_scores, flank_length)) p_deltaphylop = ttest_result['one_sided_pval'] delta_phylop = ttest_result['delta'] #T_deltaphylop = ttest_result['T'] pearsonr_pval = str('%.1g'%corr_pval) if 'e' in pearsonr_pval: pearsonr_pval += '}' pearsonr_pval = pearsonr_pval.replace('e', '*10^{').replace('-0','-') score_pval = str('%.1g'%p_deltaphylop) if 'e' in score_pval: score_pval += '}' score_pval = score_pval.replace('e', '*10^{').replace('-0','-') textstr = r'$r^2_{pearson}=%.2f(p=%s)$' '\n' r'$\Delta_{%s}=%.2f(p=%s)$' %(corr_r2, pearsonr_pval, legend_title, delta_phylop, score_pval) txtx = 1-LEGEND_XMULTIPLIER*len(textstr)/100.0 phlyop_plots_legend.set_frame_on(False) phlyop_plots_legend.set_xticks([]) phlyop_plots_legend.set_yticks([]) phlyop_plots_legend.text(txtx, TXT_YPOS, textstr, fontsize=LEGEND_FONTSIZE) f.add_subplot(phlyop_plots_legend)
def create_scatter_plot(matplot_dict, motif_freq, sample_scores, control_scores, flank_length, num_occurrences, y_label): f = matplot_dict['figure'] gs = matplot_dict['gridspec'] scatter_plot = plt.Subplot(f, gs, autoscale_on=True) control_scores = remove_flanking_scores(control_scores, flank_length) sample_scores = remove_flanking_scores(sample_scores, flank_length) fit = np.polyfit(motif_freq, sample_scores, 1) fit_fn = np.poly1d(fit) control_ols = perform_OLS(control_scores, motif_freq) sample_ols = perform_OLS(sample_scores, motif_freq) sample_regression_line = sample_ols['regression_line'] control_regression_line = control_ols['regression_line'] s1 = scatter_plot.scatter(motif_freq, sample_scores, color='g', s=[POINTSIZE for i in motif_freq], marker='^', label=r'$\mathrm{Sample}$') scatter_plot.plot(motif_freq, sample_regression_line, 'g', motif_freq, fit_fn(motif_freq), color='g', linewidth=LINEWIDTH) s2 = scatter_plot.scatter(motif_freq, control_scores, color=GREYNESS, s=[POINTSIZE for i in motif_freq], marker='o', label=r'$\mathrm{Control}$') scatter_plot.plot(motif_freq, control_regression_line, color=GREYNESS, linewidth=LINEWIDTH) leg = scatter_plot.legend(fontsize=14) leg.draw_frame(True) #leg.get_frame().set_edgecolor('b') leg.get_frame().set_linewidth(2.0) ticks_and_labels = np.linspace(1.02*min(motif_freq), 1.02*max(motif_freq), num = 3, endpoint=True) scatter_plot.set_xticks(ticks_and_labels) ticks_and_labels = ["$%.2f$"%(x/(1.02*num_occurrences)) for x in ticks_and_labels] scatter_plot.set_xticklabels(ticks_and_labels)#, rotation=45) yloc = plt.MaxNLocator(MAX_YTICKS) scatter_plot.yaxis.set_major_locator(yloc) scatter_plot.set_xlabel(r'$\mathrm{Most}\ \mathrm{frequent} \ \mathrm{base}\ \mathrm{frequency}$', fontsize=FONTSIZE, fontweight='bold') scatter_plot.get_xaxis().tick_bottom() scatter_plot.get_yaxis().tick_left() scatter_plot.set_ylabel('$\mathrm{%s}\ \mathrm{Score}$'%(y_label), fontsize=FONTSIZE, fontweight='bold') scatter_plot.tick_params(axis='y', which='major', pad=TICKPAD) scatter_plot.tick_params(axis='x', which='major', pad=TICKPAD) scatter_plot.get_yaxis().set_tick_params(direction='out') scatter_plot.get_xaxis().set_tick_params(direction='out') scatter_plot.tick_params('both', length=TICKLENGTH, width=2, which='major') f.add_subplot(scatter_plot)
'motif_enrichment': motif_enrichment, 'motif_logo': motif_logo_encoded, 'motif_logorc': motif_logorc_encoded, 'moca_plot': moca_plot_encoded, 'moca_plotrc': moca_plotrc_encoded, 'no_fimo_hit_sample': True }) continue gerp_mean_sample = np.loadtxt( os.path.join(fimo_sample, 'gerp.mean.txt')).tolist() phylop_mean_sample = np.loadtxt( os.path.join(fimo_sample, 'phylop.mean.txt')).tolist() delta_phylop_ttest = perform_t_test( remove_flanking_scores(phylop_mean_sample, flank_length), get_flanking_scores(phylop_mean_sample, flank_length)) p_delta_phylop = delta_phylop_ttest['one_sided_pval'] delta_phylop = delta_phylop_ttest['delta'] delta_gerp_ttest = perform_t_test( remove_flanking_scores(gerp_mean_sample, flank_length), get_flanking_scores(gerp_mean_sample, flank_length)) p_delta_gerp = delta_gerp_ttest['one_sided_pval'] delta_gerp = delta_gerp_ttest['delta'] phylop_sample_ols = perform_OLS( remove_flanking_scores(phylop_mean_sample, flank_length), motif_freq) gerp_sample_ols = perform_OLS( remove_flanking_scores(gerp_mean_sample, flank_length), motif_freq)
motif_evalue = get_motif_evalue(record) if os.stat(os.path.join(fimo_sample, 'gerp.mean.txt')).st_size == 0: db.encode_tf_stats.insert_one({ 'encode_id': d, 'motif_number': i+1, 'center_enrichment': center_enrichment, 'center_enrichment_pval': center_enrichment_pval, 'motif_evalue': motif_evalue, 'motif_enrichment': motif_enrichment, 'no_fimo_hit_sample': True}) continue gerp_mean_sample = np.loadtxt(os.path.join(fimo_sample, 'gerp.mean.txt')).tolist() phylop_mean_sample = np.loadtxt(os.path.join(fimo_sample, 'phylop.mean.txt')).tolist() delta_phylop_ttest = perform_t_test(remove_flanking_scores(phylop_mean_sample, flank_length), get_flanking_scores(phylop_mean_sample, flank_length)) p_delta_phylop = delta_phylop_ttest['one_sided_pval'] delta_phylop = delta_phylop_ttest['delta'] delta_gerp_ttest = perform_t_test(remove_flanking_scores(gerp_mean_sample, flank_length), get_flanking_scores(gerp_mean_sample, flank_length)) p_delta_gerp = delta_gerp_ttest['one_sided_pval'] delta_gerp = delta_gerp_ttest['delta'] phylop_sample_ols = perform_OLS(remove_flanking_scores(phylop_mean_sample, flank_length), motif_freq) gerp_sample_ols = perform_OLS(remove_flanking_scores(gerp_mean_sample, flank_length), motif_freq) phylop_sample_fit = phylop_sample_ols['regression_fit'] gerp_sample_fit = gerp_sample_ols['regression_fit'] corr_phylop_sample = get_pearson_corr(motif_freq, remove_flanking_scores(phylop_mean_sample, flank_length)) corr_gerp_sample = get_pearson_corr(motif_freq, remove_flanking_scores(gerp_mean_sample, flank_length))