def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve. """ pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)]) ids = [] if args.ids: ids = args.ids.split(",") else: ids = motifs.keys() fg_total = {} result = scan(fg_file, [motifs[x] for x in ids], 0.0, 1) for key,m in result.items(): fg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()] bg_total = {} result = scan(bg_file, [motifs[x] for x in ids], 0.0, 1) for key,m in result.items(): bg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()] plot_x = [] plot_y = [] # Print the metrics print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr." for id in ids: fg_vals = fg_total[id] bg_vals = bg_total[id] (x, y) = ROC_values(fg_vals, bg_vals) plot_x.append(x) plot_y.append(y) auc = ROC_AUC(fg_vals, bg_vals) mncp = MNCP(fg_vals, bg_vals) enr_fdr = enr_at_fdr(fg_vals, bg_vals) max_enr,score = max_enrichment(fg_vals, bg_vals) print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (id, auc, mncp, enr_fdr, max_enr) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=ids)
def threshold(args): if args.fdr < 0 or args.fdr > 1: print "Please specify a FDR between 0 and 1" sys.exit(1) motifs = pwmfile_to_motifs(args.pwmfile) result = scan(args.inputfile, motifs, 0.0, 1) print "Motif\tScore\tCutoff" for motif in result.keys(): pwm = motif.pwm scores = [] min_score = motif.pwm_min_score() scores = [x[0][1] for x in result[motif].values() if len(x) > 0] if len(scores) > 0: opt_score = scoreatpercentile(scores, 100 - (100 * args.fdr)) cutoff = (opt_score - min_score) / (motif.pwm_max_score() - min_score) print "{0}\t{1}\t{2}".format(motif.id, opt_score , cutoff) else: sys.stderr.write("Warning: no matches for {0}\n".format(motif.id))