def main(): usage = 'usage: %prog anchor_results.txt anchor_results_null.txt\n'\ 'Requires two input arguments:\n'\ '1) Interesting anchor results, output from run_anchor_batch.py\n'\ '2) Null anchor results, output from run_anchor_batch.py\n' parser = OptionParser(usage=usage) parser.add_option('-1', '--exon_label1', dest='exon_label1', default='Exon label 1', help='Exon label of anchor_results.txt.') parser.add_option('-2', '--exon_label2', dest='exon_label2', default='Exon label 2', help='Exon label of anchor_results_null.txt') parser.add_option( '-t', '--title', dest='title', default='Fraction of exons with predicted binding regions', help='Title of plot.') (options, args) = parser.parse_args() if len(args) != 2: print 'Two arguments need to be specified in command line.\n' print usage sys.exit() anchor_results_path = args[0] anchor_results_null_path = args[1] exon_label1 = options.exon_label1 exon_label2 = options.exon_label2 mytitle = options.title # init dic with keys and empty lists anchor_dic = {} for key in ['binding', 'non_binding', 'total']: anchor_dic[key] = [] for results in [anchor_results_path, anchor_results_null_path]: binding_count, total_count = count_anchor_results(results) non_binding_count = total_count - binding_count for key, val in zip(['binding', 'non_binding', 'total'], [binding_count, non_binding_count, total_count]): anchor_dic[key].append(val) oddsratio, pvalue = \ fisher_exact([anchor_dic['binding'], anchor_dic['non_binding']]) print 'oddsratio: %s\npvalue: %s' % (oddsratio, pvalue) # plot distributions (from plot_meme_motif_null_comparison.py) mylabels = [exon_label1, exon_label2] # Plot bargraphs frac_binding = float(anchor_dic['binding'][0]) / anchor_dic['total'][0] frac_binding_null = float( anchor_dic['binding'][1]) / anchor_dic['total'][1] myvals = [frac_binding, frac_binding_null] plot_barplot(myvals, mytitle, mylabels, ylabel='Fraction predicted binding regions', mytext1="%i/%i" \ %(anchor_dic['binding'][0], anchor_dic['total'][0]), mytext2='%i/%i' %(anchor_dic['binding'][1], anchor_dic['total'][1]), mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue, ymin=0, ymax=1, width=0.5) plt.show()
def main(): usage = 'usage: %prog anchor_results.txt anchor_results_null.txt\n'\ 'Requires two input arguments:\n'\ '1) Interesting anchor results, output from run_anchor_batch.py\n'\ '2) Null anchor results, output from run_anchor_batch.py\n' parser = OptionParser(usage=usage) parser.add_option('-1', '--exon_label1', dest='exon_label1', default='Exon label 1', help='Exon label of anchor_results.txt.') parser.add_option('-2', '--exon_label2', dest='exon_label2', default='Exon label 2', help='Exon label of anchor_results_null.txt') parser.add_option('-t', '--title', dest='title', default='Fraction of exons with predicted binding regions', help='Title of plot.') (options, args) = parser.parse_args() if len(args) != 2: print 'Two arguments need to be specified in command line.\n' print usage sys.exit() anchor_results_path = args[0] anchor_results_null_path = args[1] exon_label1 = options.exon_label1 exon_label2 = options.exon_label2 mytitle = options.title # init dic with keys and empty lists anchor_dic = {} for key in ['binding', 'non_binding', 'total']: anchor_dic[key] = [] for results in [anchor_results_path, anchor_results_null_path]: binding_count, total_count = count_anchor_results(results) non_binding_count = total_count - binding_count for key, val in zip(['binding', 'non_binding', 'total'], [binding_count, non_binding_count, total_count]): anchor_dic[key].append(val) oddsratio, pvalue = \ fisher_exact([anchor_dic['binding'], anchor_dic['non_binding']]) print 'oddsratio: %s\npvalue: %s' %(oddsratio, pvalue) # plot distributions (from plot_meme_motif_null_comparison.py) mylabels = [exon_label1, exon_label2] # Plot bargraphs frac_binding = float(anchor_dic['binding'][0]) / anchor_dic['total'][0] frac_binding_null = float(anchor_dic['binding'][1]) / anchor_dic['total'][1] myvals = [frac_binding, frac_binding_null] plot_barplot(myvals, mytitle, mylabels, ylabel='Fraction predicted binding regions', mytext1="%i/%i" \ %(anchor_dic['binding'][0], anchor_dic['total'][0]), mytext2='%i/%i' %(anchor_dic['binding'][1], anchor_dic['total'][1]), mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue, ymin=0, ymax=1, width=0.5) plt.show()
def main(): usage = 'usage: %prog meme_gerp_genename_filepath output_filepath\n'\ 'Requires two input arguments:\n'\ '1) pkl file from summarize_meme_results: non-null\n'\ '2) pkl file from summarize_meme_results: null-mode\n' parser = OptionParser(usage=usage) parser.add_option('-t', '--threshold', dest='score_threshold', default=2.0, help='Float, threshold for what one considers conserved.') parser.add_option('-y', '--ymax', dest='ymax', type='float', default=0.03, help='Y max for density plot') (options, args) = parser.parse_args() if len(args) < 2: print 'Two arguments need to be specified in command line.\n' print usage sys.exit() non_null_pklpath = args[0] null_pklpath = args[1] # parse ops score_threshold = float(options.score_threshold) # get dics from pkl non_null_dic = get_dic_from_pklpath(non_null_pklpath) null_dic = get_dic_from_pklpath(null_pklpath) non_null_gerp_scores = get_gerp_scores(non_null_dic, gerpkey='avg_rs_score') null_gerp_scores = get_gerp_scores(null_dic, gerpkey='avg_rs_score') plot_functions.plot_density([non_null_gerp_scores, null_gerp_scores], mytitle='Density plot of conservation scores', labels_lists=['MEME motifs', 'Controls'], xlabel='GERP conservation score', ylabel='Density', xmin=-4, xmax=4, ymax=options.ymax, smoothness=0.15, drawvline=score_threshold) # find how many conserved regions are in each. n_conserved_in_meme = \ gerp_utilities.conserved_regions(non_null_gerp_scores, fraction=False, threshold=score_threshold) n_conserved_in_null = \ gerp_utilities.conserved_regions(null_gerp_scores, fraction=False, threshold=score_threshold) n_total_in_meme = len(non_null_gerp_scores) n_total_in_null = len(null_gerp_scores) n_not_conserved_in_meme = n_total_in_meme - n_conserved_in_meme n_not_conserved_in_null = n_total_in_null - n_conserved_in_null print 'Threshold: %s' %score_threshold print 'Number of conserved elements: %s' %n_conserved_in_meme print 'Number of conserved elements found in control: %s' %n_conserved_in_null # Perform fisher's exact test oddsratio, pvalue = fisher_exact([[n_conserved_in_meme, n_conserved_in_null], [n_not_conserved_in_meme, n_not_conserved_in_null]]) print 'Fishers Exact Test, Oddsratio: %s. Pvalue: %s' %(oddsratio, pvalue) # plot distributions mylabels = ['Meme motifs', 'Control region'] mytitle = 'Fraction of elements conserved compared to control region' # Plot bargraphs frac_conserved_meme = float(n_conserved_in_meme) / n_total_in_meme frac_conserved_null = float(n_conserved_in_null) / n_total_in_null myvals = [frac_conserved_meme, frac_conserved_null] plot_functions.plot_barplot(myvals, mytitle, mylabels, ylabel='Fraction of elements conserved', mytext1="%i/%i" \ %(n_conserved_in_meme, n_total_in_meme), mytext2='%i/%i' %(n_conserved_in_null, n_total_in_null), mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue, ymin=0, ymax=1, width=0.5) plt.show()
def main(): usage = 'usage: %prog meme_gerp_genename_filepath output_filepath\n'\ 'Requires two input arguments:\n'\ '1) pkl file from summarize_meme_results: non-null\n'\ '2) pkl file from summarize_meme_results: null-mode\n' parser = OptionParser(usage=usage) parser.add_option( '-t', '--threshold', dest='score_threshold', default=2.0, help='Float, threshold for what one considers conserved.') parser.add_option('-y', '--ymax', dest='ymax', type='float', default=0.03, help='Y max for density plot') (options, args) = parser.parse_args() if len(args) < 2: print 'Two arguments need to be specified in command line.\n' print usage sys.exit() non_null_pklpath = args[0] null_pklpath = args[1] # parse ops score_threshold = float(options.score_threshold) # get dics from pkl non_null_dic = get_dic_from_pklpath(non_null_pklpath) null_dic = get_dic_from_pklpath(null_pklpath) non_null_gerp_scores = get_gerp_scores(non_null_dic, gerpkey='avg_rs_score') null_gerp_scores = get_gerp_scores(null_dic, gerpkey='avg_rs_score') plot_functions.plot_density([non_null_gerp_scores, null_gerp_scores], mytitle='Density plot of conservation scores', labels_lists=['MEME motifs', 'Controls'], xlabel='GERP conservation score', ylabel='Density', xmin=-4, xmax=4, ymax=options.ymax, smoothness=0.15, drawvline=score_threshold) # find how many conserved regions are in each. n_conserved_in_meme = \ gerp_utilities.conserved_regions(non_null_gerp_scores, fraction=False, threshold=score_threshold) n_conserved_in_null = \ gerp_utilities.conserved_regions(null_gerp_scores, fraction=False, threshold=score_threshold) n_total_in_meme = len(non_null_gerp_scores) n_total_in_null = len(null_gerp_scores) n_not_conserved_in_meme = n_total_in_meme - n_conserved_in_meme n_not_conserved_in_null = n_total_in_null - n_conserved_in_null print 'Threshold: %s' % score_threshold print 'Number of conserved elements: %s' % n_conserved_in_meme print 'Number of conserved elements found in control: %s' % n_conserved_in_null # Perform fisher's exact test oddsratio, pvalue = fisher_exact( [[n_conserved_in_meme, n_conserved_in_null], [n_not_conserved_in_meme, n_not_conserved_in_null]]) print 'Fishers Exact Test, Oddsratio: %s. Pvalue: %s' % (oddsratio, pvalue) # plot distributions mylabels = ['Meme motifs', 'Control region'] mytitle = 'Fraction of elements conserved compared to control region' # Plot bargraphs frac_conserved_meme = float(n_conserved_in_meme) / n_total_in_meme frac_conserved_null = float(n_conserved_in_null) / n_total_in_null myvals = [frac_conserved_meme, frac_conserved_null] plot_functions.plot_barplot(myvals, mytitle, mylabels, ylabel='Fraction of elements conserved', mytext1="%i/%i" \ %(n_conserved_in_meme, n_total_in_meme), mytext2='%i/%i' %(n_conserved_in_null, n_total_in_null), mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue, ymin=0, ymax=1, width=0.5) plt.show()