def runParallelSlices(options): refGenome = options.refGenome if refGenome is None: refGenome = getHalRootName(options.halFile) refSequenceStats = getHalSequenceStats(options.halFile, refGenome) options.smallFile = False options.firstSmallFile = True sliceCmds = [] sliceOpts = [] # we are going to deal with sequence coordinates if options.splitBySequence is True or options.refSequence is not None: for sequence, seqLen, nt, nb in refSequenceStats: if options.refSequence is None or sequence == options.refSequence: seqOpts = copy.deepcopy(options) if seqLen < options.smallSize: seqOpts.smallFile = True seqOpts.refGenome = refGenome seqOpts.refSequence = sequence index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, seqLen): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHal2MafCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) if seqOpts.smallFile is True and seqLen > 0: options.firstSmallFile = False # we are slicing the gnome coordinates directly else: seqOpts = copy.deepcopy(options) assert seqOpts.splitBySequence is False genomeLen = getHalGenomeLength(seqOpts.halFile, refGenome) # auto compute slice size from numprocs if seqOpts.sliceSize == None and seqOpts.numProc > 1: refLen = genomeLen if seqOpts.length is not None and seqOpts.length > 0: refLen = seqOpts.length seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc)) index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, genomeLen): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHal2MafCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) # run in parallel runParallelShellCommands(sliceCmds, options.numProc) # concatenate into output if desired concatenateSlices(sliceOpts, sliceCmds)
def runParallelSlices(options): refGenome = options.refGenome if refGenome is None: refGenome = getHalRootName(options.halFile) refSequenceStats = getHalSequenceStats(options.halFile, refGenome) options.smallFile = False options.firstSmallFile = True sliceCmds = [] sliceOpts = [] if options.refSequence is not None: refStat = [x for x in refSequenceStats if x[1] == options.refSequence] if len(refStat != 1): raise RuntimeError("Sequence %s not found in genome %s" % ( options.refSequence, options.refGenome)) totalLength = int(refStat[1]) else: totalLength = getHalGenomeLength(options.halFile, refGenome) seqOpts = copy.deepcopy(options) # auto compute slice size from numprocs if seqOpts.sliceSize == None and seqOpts.numProc > 1: refLen = totalLength if seqOpts.length is not None and seqOpts.length > 0: refLen = seqOpts.length seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc)) index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, totalLength): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHalPhyloPCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) # run in parallel runParallelShellCommands(sliceCmds, options.numProc) # concatenate into output if desired concatenateSlices(sliceOpts, sliceCmds) writeChromSizes(options)
def runParallelSlices(options): refGenome = options.refGenome if refGenome is None: refGenome = getHalRootName(options.halFile) refSequenceStats = getHalSequenceStats(options.halFile, refGenome) options.smallFile = False options.firstSmallFile = True sliceCmds = [] sliceOpts = [] if options.refSequence is not None: refStat = [x for x in refSequenceStats if x[1] == options.refSequence] if len(refStat != 1): raise RuntimeError("Sequence %s not found in genome %s" % (options.refSequence, options.refGenome)) totalLength = int(refStat[1]) else: totalLength = getHalGenomeLength(options.halFile, refGenome) seqOpts = copy.deepcopy(options) # auto compute slice size from numprocs if seqOpts.sliceSize == None and seqOpts.numProc > 1: refLen = totalLength if seqOpts.length is not None and seqOpts.length > 0: refLen = seqOpts.length seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc)) index = 0 for sStart, sLen, sIdx in computeSlices(seqOpts, totalLength): seqOpts.start = sStart seqOpts.length = sLen seqOpts.sliceNumber = sIdx sliceCmds.append(getHalPhyloPCmd(seqOpts)) sliceOpts.append(copy.deepcopy(seqOpts)) # run in parallel runParallelShellCommands(sliceCmds, options.numProc) # concatenate into output if desired concatenateSlices(sliceOpts, sliceCmds) writeChromSizes(options)