示例#1
0
def runParallelSlices(options):
    refGenome = options.refGenome
    if refGenome is None:
        refGenome = getHalRootName(options.halFile)
    refSequenceStats = getHalSequenceStats(options.halFile, refGenome)
    options.smallFile = False
    options.firstSmallFile = True
    sliceCmds = []
    sliceOpts = []
    # we are going to deal with sequence coordinates
    if options.splitBySequence is True or options.refSequence is not None:
        for sequence, seqLen, nt, nb in refSequenceStats:
            if options.refSequence is None or sequence == options.refSequence:
                seqOpts = copy.deepcopy(options)
                if seqLen < options.smallSize:
                    seqOpts.smallFile = True
                seqOpts.refGenome = refGenome
                seqOpts.refSequence = sequence
                index = 0
                for sStart, sLen, sIdx in computeSlices(seqOpts, seqLen):
                    seqOpts.start = sStart
                    seqOpts.length = sLen
                    seqOpts.sliceNumber = sIdx
                    sliceCmds.append(getHal2MafCmd(seqOpts))
                    sliceOpts.append(copy.deepcopy(seqOpts))
                if seqOpts.smallFile is True and seqLen > 0:
                    options.firstSmallFile = False
    # we are slicing the gnome coordinates directly
    else:
        seqOpts = copy.deepcopy(options)
        assert seqOpts.splitBySequence is False
        genomeLen = getHalGenomeLength(seqOpts.halFile, refGenome)
        # auto compute slice size from numprocs
        if seqOpts.sliceSize == None and seqOpts.numProc > 1:
            refLen = genomeLen
            if seqOpts.length is not None and seqOpts.length > 0:
                refLen = seqOpts.length
            seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc))
                
        index = 0
        for sStart, sLen, sIdx in computeSlices(seqOpts, genomeLen):
            seqOpts.start = sStart
            seqOpts.length = sLen
            seqOpts.sliceNumber = sIdx
            sliceCmds.append(getHal2MafCmd(seqOpts))
            sliceOpts.append(copy.deepcopy(seqOpts))
            
    # run in parallel
    runParallelShellCommands(sliceCmds, options.numProc)

    # concatenate into output if desired
    concatenateSlices(sliceOpts, sliceCmds)
示例#2
0
def runParallelSlices(options):
    refGenome = options.refGenome
    if refGenome is None:
        refGenome = getHalRootName(options.halFile)
    refSequenceStats = getHalSequenceStats(options.halFile, refGenome)
    options.smallFile = False
    options.firstSmallFile = True
    sliceCmds = []
    sliceOpts = []
    # we are going to deal with sequence coordinates
    if options.splitBySequence is True or options.refSequence is not None:
        for sequence, seqLen, nt, nb in refSequenceStats:
            if options.refSequence is None or sequence == options.refSequence:
                seqOpts = copy.deepcopy(options)
                if seqLen < options.smallSize:
                    seqOpts.smallFile = True
                seqOpts.refGenome = refGenome
                seqOpts.refSequence = sequence
                index = 0
                for sStart, sLen, sIdx in computeSlices(seqOpts, seqLen):
                    seqOpts.start = sStart
                    seqOpts.length = sLen
                    seqOpts.sliceNumber = sIdx
                    sliceCmds.append(getHal2MafCmd(seqOpts))
                    sliceOpts.append(copy.deepcopy(seqOpts))
                if seqOpts.smallFile is True and seqLen > 0:
                    options.firstSmallFile = False
    # we are slicing the gnome coordinates directly
    else:
        seqOpts = copy.deepcopy(options)
        assert seqOpts.splitBySequence is False
        genomeLen = getHalGenomeLength(seqOpts.halFile, refGenome)
        # auto compute slice size from numprocs
        if seqOpts.sliceSize == None and seqOpts.numProc > 1:
            refLen = genomeLen
            if seqOpts.length is not None and seqOpts.length > 0:
                refLen = seqOpts.length
            seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc))

        index = 0
        for sStart, sLen, sIdx in computeSlices(seqOpts, genomeLen):
            seqOpts.start = sStart
            seqOpts.length = sLen
            seqOpts.sliceNumber = sIdx
            sliceCmds.append(getHal2MafCmd(seqOpts))
            sliceOpts.append(copy.deepcopy(seqOpts))

    # run in parallel
    runParallelShellCommands(sliceCmds, options.numProc)

    # concatenate into output if desired
    concatenateSlices(sliceOpts, sliceCmds)
示例#3
0
def runParallelSlices(options):
    refGenome = options.refGenome
    if refGenome is None:
        refGenome = getHalRootName(options.halFile)
    refSequenceStats = getHalSequenceStats(options.halFile, refGenome)
    options.smallFile = False
    options.firstSmallFile = True
    sliceCmds = []
    sliceOpts = []
    if options.refSequence is not None:   
        refStat = [x for x in refSequenceStats if x[1] == 
                   options.refSequence]
        if len(refStat != 1):
            raise RuntimeError("Sequence %s not found in genome %s" % (
                options.refSequence, options.refGenome))
        totalLength = int(refStat[1])
    else:
        totalLength = getHalGenomeLength(options.halFile, refGenome)
    
    seqOpts = copy.deepcopy(options)

    # auto compute slice size from numprocs
    if seqOpts.sliceSize == None and seqOpts.numProc > 1:
        refLen = totalLength
        if seqOpts.length is not None and seqOpts.length > 0:
            refLen = seqOpts.length
        seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc))
                
    index = 0
    for sStart, sLen, sIdx in computeSlices(seqOpts, totalLength):
        seqOpts.start = sStart
        seqOpts.length = sLen
        seqOpts.sliceNumber = sIdx
        sliceCmds.append(getHalPhyloPCmd(seqOpts))
        sliceOpts.append(copy.deepcopy(seqOpts))
            
    # run in parallel
    runParallelShellCommands(sliceCmds, options.numProc)

    # concatenate into output if desired
    concatenateSlices(sliceOpts, sliceCmds)

    writeChromSizes(options)
示例#4
0
def runParallelSlices(options):
    refGenome = options.refGenome
    if refGenome is None:
        refGenome = getHalRootName(options.halFile)
    refSequenceStats = getHalSequenceStats(options.halFile, refGenome)
    options.smallFile = False
    options.firstSmallFile = True
    sliceCmds = []
    sliceOpts = []
    if options.refSequence is not None:
        refStat = [x for x in refSequenceStats if x[1] == options.refSequence]
        if len(refStat != 1):
            raise RuntimeError("Sequence %s not found in genome %s" %
                               (options.refSequence, options.refGenome))
        totalLength = int(refStat[1])
    else:
        totalLength = getHalGenomeLength(options.halFile, refGenome)

    seqOpts = copy.deepcopy(options)

    # auto compute slice size from numprocs
    if seqOpts.sliceSize == None and seqOpts.numProc > 1:
        refLen = totalLength
        if seqOpts.length is not None and seqOpts.length > 0:
            refLen = seqOpts.length
        seqOpts.sliceSize = int(math.ceil(refLen / seqOpts.numProc))

    index = 0
    for sStart, sLen, sIdx in computeSlices(seqOpts, totalLength):
        seqOpts.start = sStart
        seqOpts.length = sLen
        seqOpts.sliceNumber = sIdx
        sliceCmds.append(getHalPhyloPCmd(seqOpts))
        sliceOpts.append(copy.deepcopy(seqOpts))

    # run in parallel
    runParallelShellCommands(sliceCmds, options.numProc)

    # concatenate into output if desired
    concatenateSlices(sliceOpts, sliceCmds)

    writeChromSizes(options)