def halTreeTurnoverParams(halPath, obsPath, rootName, allInternals, maxIt, step, retries): observations = readTurnoverFile(halPath, obsPath) nextQueue = deque() nextQueue.append(rootName) output = [] while len(nextQueue) > 0: next = nextQueue.popleft() if next == rootName or (allInternals is True and len(getHalChildrenNames(halPath, next)) > 0): obsVals = getValuesBelowRoot(halPath, next, observations) result = estimateParamsFromList(obsVals, maxIt, step, retries) print "%s: lr=%f gr=%f dsq=%f" % (next,result[0], result[1], result[2]) printComparison(halPath, obsVals, observations, result) for child in getHalChildrenNames(halPath, next): nextQueue.append(child) return output
def getHalTreeTurnover(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): if root != getHalRootName(halPath): consFile = os.path.join(args.workDir, args.conservedBedName % child) checkFile(consFile) pconsFile = os.path.join(args.workDir, args.conservedBedName % root) checkFile(pconsFile) outMappedAlignedBed = os.path.join(args.workDir, child + "_pa.bed") outParentSlicedBed = os.path.join(args.workDir, child + "_pslice.bed") outMappedGenomeBed = os.path.join(args.workDir, child + "_pm.bed") outConservationBed = os.path.join(args.workDir, child + "_int.bed") outAlignedBed = os.path.join(args.workDir, child + "_al.bed") outGainBed = os.path.join(args.workDir, child + "_gain.bed") outLossBed = os.path.join(args.workDir, child + "_loss.bed") (conLen, gainLen, lossLen, unconLen) = compareConservationOverBranch( halPath, child, consFile, pconsFile, outMappedAlignedBed, outParentSlicedBed, outMappedGenomeBed, outConservationBed, outAlignedBed, outGainBed, outLossBed) gainRate = 0 if conLen + lossLen > 0: gainRate = float(gainLen) / (unconLen + gainLen) lossRate = 0 if unconLen + gainLen > 0: lossRate = float(lossLen) / (conLen + lossLen) branchLength = getBranchLength(halPath, child) print "%s: cons %d ucons %d gain %d (%f) loss %d (%f) bl %f" % ( child, conLen, unconLen, gainLen, gainRate, lossLen, lossRate, branchLength) getHalTreeTurnover(halPath, args, child)
def getHalTreeBackground(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): bgFile = os.path.join(args.workDir, args.backgroundBedName % child) if args.ar is True: command = "halMaskExtract %s %s --maskFile %s --extend %d --extendPct %f" % (halPath, child, bgFile, args.arExtend, args.arExtendPct) else: command = "halStats %s --bedSequences %s > %s" % (halPath, child, bgFile) print(command) runShellCommand(command) getHalTreeBackground(halPath, args, child)
def getValuesBelowRoot(halPath, rootName, observations): nextQueue = deque() nextQueue.append(rootName) output = [] while len(nextQueue) > 0: next = nextQueue.popleft() if next != rootName: if next in observations: output.append(observations[next]) else: sys.stderr.write("Warning, no observation for %s\n" % next) for child in getHalChildrenNames(halPath, next): nextQueue.append(child) return output
def getHalTreeBackground(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): bgFile = os.path.join(args.workDir, args.backgroundBedName % child) if args.ar is True: command = "halMaskExtract %s %s --maskFile %s --extend %d --extendPct %f" % (halPath, child, bgFile, args.arExtend, args.arExtendPct) else: command = "halStats %s --bedSequences %s > %s" % (halPath, child, bgFile) print command runShellCommand(command) getHalTreeBackground(halPath, args, child)
def getHalTreeConservation(halPath, args, events, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): bgFile = os.path.join(args.workDir, args.backgroundBedName % child) muFile = os.path.join(args.workDir, args.mutationsBedName % child) checkFiles(bgFile, muFile) outPath = os.path.join(args.workDir, args.conservedBedName % child) outFile = open(outPath, "w") bc = BedConservation() bc.computeBackgroundRate(muFile, bgFile, events) bc.identifyConservedIntervals(muFile, outFile, float(args.pval), float(args.cutoff)) getHalTreeConservation(halPath, args, events, child) print "%s: %d segments with %d bases (%f pct of genome) found. bgrate= %f minDist=%d" % ( child, bc.writtenCount, bc.writtenBases, float(bc.writtenBases) / float(genomeLength(halPath, child)), bc.rate, bc.minDistance(float(args.pval)))
def getHalTreeTurnover(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): if root != getHalRootName(halPath): consFile = os.path.join(args.workDir, args.conservedBedName % child) checkFile(consFile) pconsFile = os.path.join(args.workDir, args.conservedBedName % root) checkFile(pconsFile) outMappedAlignedBed = os.path.join(args.workDir, child + "_pa.bed") outParentSlicedBed = os.path.join(args.workDir, child + "_pslice.bed") outMappedGenomeBed = os.path.join(args.workDir, child + "_pm.bed") outConservationBed = os.path.join(args.workDir, child + "_int.bed") outAlignedBed = os.path.join(args.workDir, child + "_al.bed") outGainBed = os.path.join(args.workDir, child + "_gain.bed") outLossBed = os.path.join(args.workDir, child + "_loss.bed") (conLen, gainLen, lossLen, unconLen) = compareConservationOverBranch( halPath, child, consFile, pconsFile, outMappedAlignedBed, outParentSlicedBed, outMappedGenomeBed, outConservationBed, outAlignedBed, outGainBed, outLossBed) gainRate = 0 if conLen + lossLen > 0: gainRate = float(gainLen) / (unconLen + gainLen) lossRate = 0 if unconLen + gainLen > 0: lossRate = float(lossLen) / (conLen + lossLen) branchLength = getBranchLength(halPath, child) print "%s: cons %d ucons %d gain %d (%f) loss %d (%f) bl %f" % ( child, conLen, unconLen, gainLen, gainRate, lossLen, lossRate, branchLength) getHalTreeTurnover(halPath, args, child)
def getHalTreeConservation(halPath, args, events, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): bgFile = os.path.join(args.workDir, args.backgroundBedName % child) muFile = os.path.join(args.workDir, args.mutationsBedName % child) checkFiles(bgFile, muFile) outPath = os.path.join(args.workDir, args.conservedBedName % child) outFile = open(outPath, "w") bc = BedConservation() bc.computeBackgroundRate(muFile, bgFile, events) bc.identifyConservedIntervals(muFile, outFile, float(args.pval), float(args.cutoff)) getHalTreeConservation(halPath, args, events, child) print "%s: %d segments with %d bases (%f pct of genome) found. bgrate= %f minDist=%d" % ( child, bc.writtenCount, bc.writtenBases, float(bc.writtenBases) / float(genomeLength(halPath, child)), bc.rate, bc.minDistance(float(args.pval)), )