def getHalTreeMutations(halPath, args, rootName=None): root = rootName if root is None: root = getHalRootName(halPath) for child in getHalChildrenNames(halPath, root): getHalBranchMutations(halPath, child, args) getHalTreeMutations(halPath, args, child)
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Compute PhyloP scores (in wig format) for each genome in" " an alignment. Scores are computed once per column with " "halPhyloPMP.py and iteratively lifted down the tree using " "halWiggleLiftover (starting at the root).") parser.add_argument("hal", help="input hal") parser.add_argument("mod", help="model file for PhyloP. Can be created " "with halPhyloPTrain.py") parser.add_argument("outWigDir", help="directory where output wig files" " will be written") parser.add_argument("--root", help="Name of root. If not specified the" " HAL root will be used", default=None) parser.add_argument("--numProc", help="Maximum number of processes.", type=int, default=1) parser.add_argument("--bigWig", help="Run wigToBigWig on each generated wiggle", action="store_true", default=False) parser.add_argument("--subtree", help="Run clade-specific acceleration/conservation on subtree below this node", default=None) parser.add_argument("--prec", help="Number of decimal places in wig output", type=int, default=None) # need phyloP options here: args = parser.parse_args() if not os.path.isfile(args.hal): raise RuntimeError("Input hal file %s not found" % args.hal) if not os.path.isfile(args.mod): raise RuntimeError("Input mod file %s not found" % args.mod) if not os.path.isdir(args.outWigDir): os.makedirs(args.outWigDir) if not os.path.isdir(args.outWigDir): raise RuntimeError("%s not found" % args.outWigDir) args.halGenomes = getHalGenomes(args.hal) if args.root is None: args.root = getHalRootName(args.hal) if not args.root in args.halGenomes: raise RuntimeError("Root genome %s not found." % args.root) if args.subtree is not None and args.root not in args.halGenomes: raise RuntimeError("Subtree root %s not found." % args.subtree) # make a little id tag for temporary maf slices S = string.ascii_uppercase + string.digits args.tempID = 'halTreePhyloP' + ''.join(random.choice(S) for x in range(5)) computeTreePhyloP(args)
def runParallelSlices(options): refGenome = options.refGenome if refGenome is None: refGenome = getHalRootName(options.halFile) refSequenceStats = getHalSequenceStats(options.halFile, refGenome) options.smallFile = False options.firstSmallFile = True sliceCmds = [] sliceOpts = [] # we are going to deal with sequence coordinates if options.splitBySequence is True or options.refSequence is not None: for sequence, seqLen, nt, nb in refSequenceStats: if options.refSequence is None or sequence == options.refSequence: seqOpts = copy.deepcopy(options) if seqLen < options.smallSize: seqOpts.smallFile = True seqOpts.refGenome = refGenome seqOpts.refSequence = sequence index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, seqLen): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHal2MafCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) if seqOpts.smallFile is True and seqLen > 0: options.firstSmallFile = False # we are slicing the gnome coordinates directly else: seqOpts = copy.deepcopy(options) assert seqOpts.splitBySequence is False genomeLen = getHalGenomeLength(seqOpts.halFile, refGenome) # auto compute slice size from numprocs if seqOpts.sliceSize == None and seqOpts.numProc > 1: refLen = genomeLen if seqOpts.length is not None and seqOpts.length > 0: refLen = seqOpts.length seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc)) index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, genomeLen): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHal2MafCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) # run in parallel runParallelShellCommands(sliceCmds, options.numProc) # concatenate into output if desired concatenateSlices(sliceOpts, sliceCmds)
def runParallelSlices(options): refGenome = options.refGenome if refGenome is None: refGenome = getHalRootName(options.halFile) refSequenceStats = getHalSequenceStats(options.halFile, refGenome) options.smallFile = False options.firstSmallFile = True sliceCmds = [] sliceOpts = [] if options.refSequence is not None: refStat = [x for x in refSequenceStats if x[1] == options.refSequence] if len(refStat != 1): raise RuntimeError("Sequence %s not found in genome %s" % ( options.refSequence, options.refGenome)) totalLength = int(refStat[1]) else: totalLength = getHalGenomeLength(options.halFile, refGenome) seqOpts = copy.deepcopy(options) # auto compute slice size from numprocs if seqOpts.sliceSize == None and seqOpts.numProc > 1: refLen = totalLength if seqOpts.length is not None and seqOpts.length > 0: refLen = seqOpts.length seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc)) index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, totalLength): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHalPhyloPCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) # run in parallel runParallelShellCommands(sliceCmds, options.numProc) # concatenate into output if desired concatenateSlices(sliceOpts, sliceCmds) writeChromSizes(options)
def runParallelSlices(options): refGenome = options.refGenome if refGenome is None: refGenome = getHalRootName(options.halFile) refSequenceStats = getHalSequenceStats(options.halFile, refGenome) options.smallFile = False options.firstSmallFile = True sliceCmds = [] sliceOpts = [] if options.refSequence is not None: refStat = [x for x in refSequenceStats if x[1] == options.refSequence] if len(refStat != 1): raise RuntimeError("Sequence %s not found in genome %s" % (options.refSequence, options.refGenome)) totalLength = int(refStat[1]) else: totalLength = getHalGenomeLength(options.halFile, refGenome) seqOpts = copy.deepcopy(options) # auto compute slice size from numprocs if seqOpts.sliceSize == None and seqOpts.numProc > 1: refLen = totalLength if seqOpts.length is not None and seqOpts.length > 0: refLen = seqOpts.length seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc)) index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, totalLength): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHalPhyloPCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) # run in parallel runParallelShellCommands(sliceCmds, options.numProc) # concatenate into output if desired concatenateSlices(sliceOpts, sliceCmds) writeChromSizes(options)
def runParallelSlices(options): refGenome = options.refGenome if refGenome is None: refGenome = getHalRootName(options.halFile) options.smallFile = False options.firstSmallFile = True if options.refTargets: sliceCmds, sliceOpts = partitionRefTargets(options) elif options.splitBySequence is True or options.refSequence is not None: sliceCmds, sliceOpts = partitionBySeqCoords(options, refGenome) else: sliceCmds, sliceOpts = partitionByGenomeCoords(options, refGenome) # run in parallel runParallelShellCommands(sliceCmds, options.numProc) # clean up temporary bed files (if present) for opts in sliceOpts: if opts.refTargets and os.path.isfile(opts.refTargets): os.remove(opts.refTargets) # concatenate into output if desired concatenateSlices(sliceOpts, sliceCmds)