def main(argv): if (len(argv) < 1): sys.stderr.write("Error: Number of arguments incorrect\n") usage() sys.exit() else: hidden_decoys = False hidden_pattern = "" decoy_pattern = "random" verbose = False ties = True label = "" try: opts, args = getopt.getopt( sys.argv[2:], "e:d:vhl:t", ["hidden=", "prefix=", "verbose", "help", "label", "ties"]) except getopt.GetoptError, err: # print help information and exit: print str( err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-e", "--hidden"): hidden_decoys = True hidden_pattern = a elif o in ("-d", "--prefix"): decoy_pattern = a elif o in ("-l", "--label"): label = a elif o in ("-t", "--ties"): ties = a else: assert False, "unhandled option" if (os.path.isfile(argv[0])): infile = argv[0] else: sys.stderr.write("Error: file " + str(argv[0]) + " not found\n") sys.exit() colors = [ "red", "blue", "yellow", "black", "brown", "pink", "cyan", "darkblue", "darkred" ] hits = list() names = list() ##READING ELEMENTS## for line in open(infile).readlines(): words = line.split() prob_file = words[0] names.append(words[1]) if (verbose): print "reading file " + str(prob_file) if (os.path.isfile(prob_file)): hits.append(util.importer(prob_file)) else: sys.stderr.write("Error: file " + str(prob_file) + " not found\n") sys.exit() pvalues = list(xrange(len(hits))) pvalues2 = list(xrange(len(hits))) for x in xrange(len(hits)): ##ESTIMATING PVALUES FROM PEP## if (hidden_decoys): pvalues[x] = pep2pvalue([(ele.pep, ele.protein) for ele in [ ele for ele in hits[x] if ele.protein.find(decoy_pattern) == -1 ]], ties) pvalues[x] = [ ele[-2] for ele in pvalues[x] if ele[-1].find(hidden_pattern) != -1 ] pvalues[x] = remove_zeroes(pvalues[x]) pvalues[x] = sorted(pvalues[x], reverse=False) pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(hidden_pattern) != -1],\ [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\ decoy_pattern) else: pvalues[x] = pep2pvalue([(ele.pep, ele.protein) for ele in hits[x]], ties) pvalues[x] = [ ele[-2] for ele in pvalues[x] if ele[-1].find(decoy_pattern) != -1 ] pvalues[x] = remove_zeroes(pvalues[x]) pvalues[x] = sorted(pvalues[x], reverse=False) pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) == -1],\ [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\ decoy_pattern) if (verbose): print "generatings plots" ##PLOTTING## util.plotHist(pvalues2, names, colors, "estimated $p$ value", "#target " + label, label + "_pvalue_estimated.png") util.plot_pvalues_calibration( pvalues, names, colors, label + "_pvalue_calibration_from_pep.png") util.plot_pvalues_calibration(pvalues2, names, colors, label + "_pvalue_calibration.png") if (verbose): print "plots generated"
def main(argv): if (len(argv) < 1): sys.stderr.write("Error: Number of arguments incorrect\n") usage() sys.exit() else: hidden_decoys = False hidden_pattern = "" decoy_pattern = "random" verbose = False ties = False countdecoys = True label = "" fdr = 0.01 tdratio = 0.0 pi0 = False try: opts, args = getopt.getopt(sys.argv[2:], "e:d:vhl:f:i:r:tc", [ "hidden=", "prefix=", "verbose", "help", "label=", "fdr=", "pi0=", "tdratio=", "ties", "countdecoys" ]) except getopt.GetoptError, err: # print help information and exit: print str( err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-e", "--hidden"): hidden_decoys = True hidden_pattern = a elif o in ("-d", "--prefix"): decoy_pattern = a elif o in ("-l", "--label"): label = a elif o in ("-f", "--fdr"): fdr = float(a) elif o in ("-r", "--tdratio"): tdratio = float(a) print "Using Target Decoy Ratio = " + str(tdratio) elif o in ("-i", "--pi0"): pi0 = True print "Using pi0 to adjust empirical q values" elif o in ("-c", "--countdecoys"): countdecoys = True print "Not counting decoys when computing empirical q values" elif o in ("-t", "--ties"): ties = True print "Counting clusters as one" else: assert False, "unhandled option" if (os.path.isfile(argv[0])): infile = argv[0] else: sys.stderr.write("Error: file " + str(argv[0]) + " not found\n") sys.exit() colors = [ "red", "blue", "yellow", "black", "brown", "pink", "cyan", "darkblue", "darkred" ] hits = list() names = list() ##READING ELEMENTS## for line in open(infile).readlines(): words = line.split() if (line != ""): prob_file = words[0] names.append(words[1]) if (os.path.isfile(prob_file)): hits.append(util.importer(prob_file)) if (verbose): print "reading file " + str(prob_file) else: sys.stderr.write("Error: file " + str(prob_file) + " not found\n") sys.exit() pi0s = list(xrange(len(hits))) qvaluesEmps = list(xrange(len(hits))) qvaluesEsts = list(xrange(len(hits))) if (hidden_decoys): print "Estimating qvalues for hidden decoys mode..." for x in xrange(len(hits)): ##ESTIMATING PI0## if (hidden_decoys): hits[x] = [ ele for ele in hits[x] if ele.protein.find(decoy_pattern) == -1 ] if (pi0): pi0s[x] = estimatePi0( getPValues(hits[x], True, hidden_pattern)) else: pi0s[x] = 1.0 else: if (pi0): pi0s[x] = estimatePi0( getPValues(hits[x], True, decoy_pattern)) else: pi0s[x] = 1.0 if (pi0s[x] > 1.0 or pi0s[x] < 0.0): pi0s[x] = 1.0 ##ESTIMATING QVALUEs## ndecoys = 0 ntargets = 0 ndecoysEmp = 0 ntargetEmp = 0 if (hidden_decoys): qvaluesEmps[x], qvaluesEsts[ x], ndecoys, ntargets, ndecoysEmp, ntargetEmp = estimateFDR_hidden_decoys( hits[x], pi0s[x], hidden_pattern, False, ties, countdecoys, fdr, tdratio) else: qvaluesEmps[x], qvaluesEsts[ x], ndecoys, ntargets, ndecoysEmp, ntargetEmp = estimateQvalues( hits[x], pi0s[x], decoy_pattern, False, ties, countdecoys, fdr, tdratio) if (pi0): print "pi0 file " + str(x + 1) + " :" + str(pi0s[x]) print "elements with estimated qvalue below " + str( fdr) + " in file " + str(x + 1) + " :" + str(ntargets) print "elements with empirical qvalue below " + str( fdr) + " in file " + str(x + 1) + " :" + str(ntargetEmp) print "false positive elements with estimated qvalue below " + str( fdr) + " in file " + str(x + 1) + " :" + str(ndecoys) print "false positive elements with empirical qvalue below " + str( fdr) + " in file " + str(x + 1) + " :" + str(ndecoysEmp) if (verbose): print "generatings plots" ##PLOTTING## util.plotHist(qvaluesEsts, names, colors, "estimated $q$ value", "#target " + label, label + "_qvalue_estimated.png", fdr) util.plotHist(qvaluesEmps, names, colors, "empirical $q$ value", "#target " + label, label + "_qvalue_empirical.png", fdr) util.plotCorrelation( qvaluesEmps, qvaluesEsts, names, colors, "empirical $q$ value", "estimated $q$ value", label + "_qvalue_estimated_VS_qvalue_empirical_low_range.png", 1.0) util.plotCorrelation( qvaluesEmps, qvaluesEsts, names, colors, "empirical $q$ value", "estimated $q$ value", label + "_qvalue_estimated_VS_qvalue_empirical.png", fdr) if (verbose): print "plots generated"
def main(argv): if( len(argv) < 1): sys.stderr.write("Error: Number of arguments incorrect\n") usage() sys.exit() else: hidden_decoys = False hidden_pattern = "" decoy_pattern = "random" verbose = False ties = True label = "" try: opts, args = getopt.getopt(sys.argv[2:], "e:d:vhl:t", ["hidden=", "prefix=", "verbose", "help", "label", "ties"]) except getopt.GetoptError, err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-e", "--hidden"): hidden_decoys = True hidden_pattern = a elif o in ("-d", "--prefix"): decoy_pattern = a elif o in ("-l", "--label"): label = a elif o in ("-t", "--ties"): ties = a else: assert False, "unhandled option" if(os.path.isfile(argv[0])): infile = argv[0] else: sys.stderr.write("Error: file " + str(argv[0]) + " not found\n") sys.exit() colors = ["red","blue","yellow","black","brown","pink","cyan","darkblue","darkred"] hits = list() names = list() ##READING ELEMENTS## for line in open(infile).readlines(): words = line.split() prob_file = words[0] names.append(words[1]) if(verbose): print "reading file " +str(prob_file) if(os.path.isfile(prob_file)): hits.append(util.importer(prob_file)) else: sys.stderr.write("Error: file " + str(prob_file) + " not found\n") sys.exit() pvalues = list(xrange(len(hits))) pvalues2 = list(xrange(len(hits))) for x in xrange(len(hits)): ##ESTIMATING PVALUES FROM PEP## if(hidden_decoys): pvalues[x] = pep2pvalue([(ele.pep,ele.protein) for ele in [ele for ele in hits[x] if ele.protein.find(decoy_pattern) == -1]],ties) pvalues[x] = [ele[-2] for ele in pvalues[x] if ele[-1].find(hidden_pattern) != -1] pvalues[x] = remove_zeroes(pvalues[x]) pvalues[x] = sorted(pvalues[x],reverse=False) pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(hidden_pattern) != -1],\ [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\ decoy_pattern) else: pvalues[x] = pep2pvalue([(ele.pep,ele.protein) for ele in hits[x]],ties) pvalues[x] = [ele[-2] for ele in pvalues[x] if ele[-1].find(decoy_pattern) != -1] pvalues[x] = remove_zeroes(pvalues[x]) pvalues[x] = sorted(pvalues[x],reverse=False) pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) == -1],\ [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\ decoy_pattern) if(verbose): print "generatings plots" ##PLOTTING## util.plotHist(pvalues2,names,colors,"estimated $p$ value", "#target " + label, label + "_pvalue_estimated.png") util.plot_pvalues_calibration(pvalues,names,colors,label + "_pvalue_calibration_from_pep.png") util.plot_pvalues_calibration(pvalues2,names,colors,label + "_pvalue_calibration.png") if(verbose): print "plots generated"
def main(argv): if( len(argv) < 1): sys.stderr.write("Error: Number of arguments incorrect\n") usage() sys.exit() else: hidden_decoys = False hidden_pattern = "" decoy_pattern = "random" verbose = False ties = False countdecoys = True label = "" fdr = 0.01 tdratio = 0.0 pi0 = False try: opts, args = getopt.getopt(sys.argv[2:], "e:d:vhl:f:i:r:tc", ["hidden=", "prefix=", "verbose", "help", "label=", "fdr=", "pi0=", "tdratio=","ties","countdecoys"]) except getopt.GetoptError, err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-e", "--hidden"): hidden_decoys = True hidden_pattern = a elif o in ("-d", "--prefix"): decoy_pattern = a elif o in ("-l", "--label"): label = a elif o in ("-f", "--fdr"): fdr = float(a) elif o in ("-r", "--tdratio"): tdratio = float(a) print "Using Target Decoy Ratio = " + str(tdratio) elif o in ("-i", "--pi0"): pi0 = True print "Using pi0 to adjust empirical q values" elif o in ("-c", "--countdecoys"): countdecoys = True print "Not counting decoys when computing empirical q values" elif o in ("-t", "--ties"): ties = True print "Counting clusters as one" else: assert False, "unhandled option" if(os.path.isfile(argv[0])): infile = argv[0] else: sys.stderr.write("Error: file " + str(argv[0]) + " not found\n") sys.exit() colors = ["red","blue","yellow","black","brown","pink","cyan","darkblue","darkred"] hits = list() names = list() ##READING ELEMENTS## for line in open(infile).readlines(): words = line.split() if(line != ""): prob_file = words[0] names.append(words[1]) if(os.path.isfile(prob_file)): hits.append(util.importer(prob_file)) if(verbose): print "reading file " +str(prob_file) else: sys.stderr.write("Error: file " + str(prob_file) + " not found\n") sys.exit() pi0s = list(xrange(len(hits))) qvaluesEmps = list(xrange(len(hits))) qvaluesEsts = list(xrange(len(hits))) if(hidden_decoys): print "Estimating qvalues for hidden decoys mode..." for x in xrange(len(hits)): ##ESTIMATING PI0## if(hidden_decoys): hits[x] = [ele for ele in hits[x] if ele.protein.find(decoy_pattern) == -1] if(pi0): pi0s[x] = estimatePi0(getPValues(hits[x],True,hidden_pattern)) else: pi0s[x] = 1.0 else: if(pi0): pi0s[x] = estimatePi0(getPValues(hits[x],True,decoy_pattern)) else: pi0s[x] = 1.0 if(pi0s[x] > 1.0 or pi0s[x] < 0.0): pi0s[x] = 1.0 ##ESTIMATING QVALUEs## ndecoys = 0 ntargets = 0 ndecoysEmp = 0 ntargetEmp = 0 if(hidden_decoys): qvaluesEmps[x],qvaluesEsts[x],ndecoys,ntargets,ndecoysEmp,ntargetEmp = estimateFDR_hidden_decoys(hits[x],pi0s[x],hidden_pattern,False,ties,countdecoys,fdr,tdratio) else: qvaluesEmps[x],qvaluesEsts[x],ndecoys,ntargets,ndecoysEmp,ntargetEmp = estimateQvalues(hits[x],pi0s[x],decoy_pattern,False,ties,countdecoys,fdr,tdratio) if(pi0): print "pi0 file " + str(x+1) + " :" + str(pi0s[x]) print "elements with estimated qvalue below " + str(fdr) + " in file " + str(x+1) + " :" + str(ntargets) print "elements with empirical qvalue below " + str(fdr) + " in file " + str(x+1) + " :" + str(ntargetEmp) print "false positive elements with estimated qvalue below " + str(fdr) + " in file " + str(x+1) + " :" + str(ndecoys) print "false positive elements with empirical qvalue below " + str(fdr) + " in file " + str(x+1) + " :" + str(ndecoysEmp) if(verbose): print "generatings plots" ##PLOTTING## util.plotHist(qvaluesEsts,names,colors,"estimated $q$ value", "#target " + label, label + "_qvalue_estimated.png",fdr) util.plotHist(qvaluesEmps,names,colors,"empirical $q$ value", "#target " + label, label + "_qvalue_empirical.png",fdr) util.plotCorrelation(qvaluesEmps,qvaluesEsts,names,colors,"empirical $q$ value","estimated $q$ value",label + "_qvalue_estimated_VS_qvalue_empirical_low_range.png",1.0) util.plotCorrelation(qvaluesEmps,qvaluesEsts,names,colors,"empirical $q$ value","estimated $q$ value",label + "_qvalue_estimated_VS_qvalue_empirical.png",fdr) if(verbose): print "plots generated"