def analyze_simulation(edges, refhistoryid, historyScores, datout_fh, stats_fh, breaks_fh): #make the cost of the refhistoryid 0 so that is doesn't get included in the likelihood calculation myhistScores=np.copy(historyScores) myhistScores[np.where(historyScores[:,0] == refhistoryid),:]=0 totalp=histseg.compute_likelihood_histories(myhistScores[:,0], myhistScores) TP=[0,0,0,0] FP=[0,0,0,0] TN=[0,0,0,0] FN=[0,0,0,0] FNedges=[] types=histseg.Global_EVENTTYPES myEdgeSimData=[] # a list of tuples (edge, isTrue, refpreval, reforder) for edge in edges: if not edge.histories: edge.histories=histseg.listout_ranges(edge.histRanges) myedgesim=EdgeSimulationData(edge) type=myedgesim.type if refhistoryid in edge.histories: refindex=edge.histories.index(refhistoryid) myedgesim.refindex=refindex if len(edge.histories)>1: TP[0]+=1 TP[type]+=1 myedgesim.isTrue=1 edge.histories.pop(refindex) myedgesim.refpreval=edge.prevals.pop(refindex) myedgesim.reforder=edge.orders.pop(refindex) edge.likelihood = histseg.compute_likelihood_histories(edge.histories, myhistScores, totalp) edge.compute_timing_wmeansd(myhistScores) edge.histories.insert(refindex, refhistoryid) edge.prevals.insert(refindex, myedgesim.refpreval) edge.orders.insert(refindex, myedgesim.reforder) upperc=edge.uppercosts.pop(refindex) lowerc=edge.lowercosts.pop(refindex) myedgesim.avecost=np.mean(np.array(edge.uppercosts+edge.lowercosts)) edge.uppercosts.insert(refindex, upperc) edge.lowercosts.insert(refindex, lowerc) else: FN[0]+=1 FN[type]+=1 FNedges.append(myedgesim) myedgesim.isTrue=-1 edge.likelihood=1 myedgesim.avecost=np.mean(np.array(edge.uppercosts+edge.lowercosts)) myedgesim.refpreval=edge.prevals[refindex] myedgesim.reforder=edge.orders[refindex] else: FP[0]+=1 FP[type]+=1 edge.likelihood = histseg.compute_likelihood_histories(edge.histories, myhistScores, totalp) if edge.likelihood >1: sys.stderr.write("bad lscore: %s\t%s\t%d\n" % (str(edge.likelihood), str(totalp), len(edge.costs))) myedgesim.isTrue=0 myedgesim.avecost=np.mean(np.array(edge.uppercosts+edge.lowercosts)) myEdgeSimData.append(myedgesim) if len(FNedges) >0: TN=checkForCancellingEdges(FNedges) #this will also modify the isTrue value of FNedges for i in xrange(len(TN)): FN[i]=FN[i]-TN[i] if datout_fh: header="event_id\tevent_type\tavecost\tLscore\tCNval\ttrue\tlength\tprevals\torders\tnumhists\n" datout_fh.write(header) for edgesim in myEdgeSimData: edge=edgesim.edge prevals=",".join(map(str, [edgesim.refpreval, edge.prevalmean, edge.prevalsd])) orders=",".join(map(str, [edgesim.reforder, edge.ordermean, edge.ordersd])) type=edge.determineEventType() length=edge.get_Event_length() mystr="\t".join(map(str, [edge.id, types[type], edgesim.avecost, edge.likelihood, edge.cnval, edgesim.isTrue, length, prevals, orders, len(edge.histories)])) + "\n" datout_fh.write(mystr) if stats_fh: stats_fh.write("type\ttotal\tAmp\tDel\tAdj\n") stats_fh.write("TP\t%s\nFP\t%s\nFN\t%s\nTN\t%s\n" % ("\t".join(map(str, TP)), "\t".join(map(str, FP)), "\t".join(map(str, FN)), "\t".join(map(str, TN)) )) f1score = float(2*TP[0])/float(2*TP[0]+FN[0]+FP[0]) stats_fh.write("F1Score:\t%s\n" % (str(f1score))) if breaks_fh: breakpoints=histseg.get_breakpoints(edges, refhistoryid) for loc in breakpoints.keys(): (n, t) = breakpoints[loc] breaks_fh.write("%s\t%d\t%d\n" % (loc, n, t)) breaks_fh.write("Breakpoints: %d\n" % len(breakpoints))
refevents=get_eventcounts_for_history(historyid, events) for evnt in events: if otherid in evnt.histories: otherevents+=1 if historyid in evnt.histories: inboth+=1 if refhistoryid2: if historyid in evnt.histories: refevents+=1 outfh.write("%d\t%d\t%d\t%d\t%d\n" % (historyid, otherid, refevents, otherevents, inboth)) if __name__ == '__main__': import argparse parser=argparse.ArgumentParser(description='compute the number of events that are different across a set of histories') parser.add_argument('--braneyfile', help='a HISTORIES*.braney file') parser.add_argument('--pevntsfile', help='a *.pevnts file') parser.add_argument('--refhistoryid', help='the historyid that you want to take as step 0.', default=2500, type=int) parser.add_argument('--refhistoryid2', help='a second historyid that you want to take as step 0.', type=int) parser.add_argument('--numsteps', help='the number of steps to take.', default=0, type=int) parser.add_argument('--stepsize', help='the step size from the reference id(s) in the mcmc that you want to go.', default=1, type=int) parser.add_argument('--stepsize2', help='the step size from the second history id(s) in the mcmc that you want to go.', default=1, type=int) args=parser.parse_args() if args.braneyfile and not args.pevntsfile: events=histseg.make_events_from_braneyfn(args.braneyfile) elif args.pevntsfile: events=pickle.load(open(args.pevntsfile, 'rb')) for e in events: e.histories=histseg.listout_ranges(e.histRanges) get_history_distances_between_mcmc_steps(events, args.refhistoryid, args.refhistoryid2, args.numsteps, args.stepsize, args.stepsize2, sys.stdout)