def create_seghists_from_eventsegs(esegs, histScores): #Note that the edges must be sorted by segstr. totalp=ecycles.compute_totalp(histScores) myseghists=[] firstseg=SegmentHistory(esegs[0]) working_seghists=[firstseg] lastseg=None #keep track of the last location for segments if firstseg.start < firstseg.end: lastseg=firstseg for e in esegs[1:]: # sys.stderr.write("eseg\t%sworking_seghists: %d\tmyseghists: %d\n" % (str(e), len(working_seghists), len(myseghists))) tmpseghists=[] merged=False for seghist in working_seghists: # sys.stderr.write("working:" + str(seghist)) # if it's a break the start and end will be equal. # We don't want to merge breaks with segments, only with equivalent breaks. if ((e.start == e.end and seghist.samelocus(e)) or ((e.start < e.end) and (seghist.start < seghist.end) and seghist.overlaps(e))): new_seghists=merge_in_edge(e, seghist) tmpseghists+=new_seghists merged=True # sys.stderr.write("samelocus %d\n%s%s" % (len(tmpseghists), str(e), str(seghist))) elif seghist.overlaps(e) or seghist.comes_after(e): tmpseghists.append(seghist) elif seghist.comes_before(e): myseghists.append(seghist) if merged==True and (e.end > e.start) and (e.end > lastseg.end): newseg=SegmentHistory(e) newseg.start=lastseg.end+1 tmpseghists.append(newseg) lastseg=newseg merged=True elif merged==False: newseg=SegmentHistory(e) tmpseghists.append(newseg) if ((e.start < e.end) and (((lastseg is not None) and (lastseg.comes_before(e))) or (lastseg is None))): lastseg=newseg working_seghists=tmpseghists myseghists+=working_seghists # return myseghists sys.stderr.write("done making seghists, now creating the CN profiles.\n") datfh=open("seghists.dat", 'w') for s in myseghists: datfh.write("%s\t%d\t%d\t%d\n" % (s.chr, s.start, s.end, len(s.esegs))) create_CNprofiles_from_Edges(s, histScores, totalp) sys.stderr.write("done making CN profiles\n") datfh.close() return(myseghists)
def create_seghists_from_eventsegs(esegs, histScores): # Note that the edges must be sorted by segstr. totalp = ecycles.compute_totalp(histScores) myseghists = [] firstseg = SegmentHistory(esegs[0]) working_seghists = [firstseg] lastseg = None # keep track of the last location for segments if firstseg.start < firstseg.end: lastseg = firstseg for e in esegs[1:]: # sys.stderr.write("eseg\t%sworking_seghists: %d\tmyseghists: %d\n" % (str(e), len(working_seghists), len(myseghists))) tmpseghists = [] merged = False for seghist in working_seghists: # sys.stderr.write("working:" + str(seghist)) # if it's a break the start and end will be equal. # We don't want to merge breaks with segments, only with equivalent breaks. if (e.start == e.end and seghist.samelocus(e)) or ( (e.start < e.end) and (seghist.start < seghist.end) and seghist.overlaps(e) ): new_seghists = merge_in_edge(e, seghist) tmpseghists += new_seghists merged = True # sys.stderr.write("samelocus %d\n%s%s" % (len(tmpseghists), str(e), str(seghist))) elif seghist.overlaps(e) or seghist.comes_after(e): tmpseghists.append(seghist) elif seghist.comes_before(e): myseghists.append(seghist) if merged == True and (e.end > e.start) and (e.end > lastseg.end): newseg = SegmentHistory(e) newseg.start = lastseg.end + 1 tmpseghists.append(newseg) lastseg = newseg merged = True elif merged == False: newseg = SegmentHistory(e) tmpseghists.append(newseg) if (e.start < e.end) and (((lastseg is not None) and (lastseg.comes_before(e))) or (lastseg is None)): lastseg = newseg working_seghists = tmpseghists myseghists += working_seghists # return myseghists sys.stderr.write("done making seghists, now creating the CN profiles.\n") datfh = open("seghists.dat", "w") for s in myseghists: datfh.write("%s\t%d\t%d\t%d\n" % (s.chr, s.start, s.end, len(s.esegs))) create_CNprofiles_from_Edges(s, histScores, totalp) sys.stderr.write("done making CN profiles\n") datfh.close() return myseghists
def create_CNprofiles_from_Edges(self, histScores, totalp=0): edgelist = self.esegs mycnprofs = [] if totalp == 0: totalp = ecycles.compute_totalp(historyScores) (hprofiles, pprofiles) = create_profile_matrices(edgelist, histScores) (ridx, profiles) = get_unique_rows(hprofiles) goodi = np.where(np.sum(profiles != 0, axis=1) > 0)[0] for i in goodi: mycnp = CNprofile() mycnp.likelihood = ecycles.compute_likelihood_histories(histScores[ridx == i, 0], histScores, totalp) mycnp.numhists = np.sum(ridx == i) cnvals = profiles[i, :] pvals = pprofiles[ridx == i, :] pvalsd = np.std(pvals, axis=0) pvalsm = np.mean(pvals, axis=0) mycnp.pvals = pvalsm[cnvals != 0].tolist() mycnp.pvalsd = pvalsd[cnvals != 0].tolist() mycnp.cnvals = cnvals[cnvals != 0].tolist() mycnprofs.append(mycnp) self.CNprofiles = mycnprofs
def create_CNprofiles_from_Edges(self, histScores, totalp=0): edgelist = self.esegs mycnprofs = [] if totalp == 0: totalp = ecycles.compute_totalp(historyScores) (hprofiles, pprofiles) = create_profile_matrices(edgelist, histScores) (ridx, profiles) = get_unique_rows(hprofiles) goodi = np.where(np.sum(profiles != 0, axis=1) > 0)[0] for i in goodi: mycnp = CNprofile() mycnp.likelihood = ecycles.compute_likelihood_histories( histScores[ridx == i, 0], histScores, totalp) mycnp.numhists = np.sum(ridx == i) cnvals = profiles[i, :] pvals = pprofiles[ridx == i, :] pvalsd = np.std(pvals, axis=0) pvalsm = np.mean(pvals, axis=0) mycnp.pvals = pvalsm[cnvals != 0].tolist() mycnp.pvalsd = pvalsd[cnvals != 0].tolist() mycnp.cnvals = cnvals[cnvals != 0].tolist() mycnprofs.append(mycnp) self.CNprofiles = mycnprofs