Пример #1
0
    def _mainLoop(self):
        # Split up reference genome into chunks and farm out the
        # a chunk as a unit of work.
        logging.debug("Starting main loop.")
        ids = reference.enumerateIds(options.referenceWindows)
        for _id in ids:
            if options.fancyChunking:
                chunks = reference.fancyEnumerateChunks(self._inCmpH5,
                                                        _id,
                                                        options.referenceChunkSize,
                                                        options.minCoverage,
                                                        options.minMapQV,
                                                        options.referenceWindows)
            else:
                chunks = reference.enumerateChunks(_id,
                                                   options.referenceChunkSize,
                                                   options.referenceWindows)
            for chunk in chunks:
                if self._aborting: return
                try:
                    self._workQueue.put(chunk, True, options.queueTimeout)
                except:
                    return

        # Write sentinels ("end-of-work-stream")
        for i in xrange(options.numWorkers):
            self._workQueue.put(None)
Пример #2
0
    def _mainLoop(self):
        # Split up reference genome into chunks and farm out the
        # a chunk as a unit of work.
        logging.debug("Starting main loop.")
        ids = reference.enumerateIds(options.referenceWindows)
        for _id in ids:
            if options.fancyChunking:
                chunks = reference.fancyEnumerateChunks(
                    self._inAlnFile, _id, options.referenceChunkSize,
                    options.minCoverage, options.minMapQV,
                    options.referenceWindows)
            else:
                chunks = reference.enumerateChunks(_id,
                                                   options.referenceChunkSize,
                                                   options.referenceWindows)
            for chunk in chunks:
                if self._aborting: return
                self._workQueue.put(chunk)

        # Write sentinels ("end-of-work-stream")
        for i in xrange(options.numWorkers):
            self._workQueue.put(None)
Пример #3
0
def getReads(cmpH5, reference, interval, paddedTemplateWidth, depthLimit, real_quiver=False):
    
    minMapQV = 10
    minPoaCoverage = 3
    maxPoaCoverage = 11
    mutationSeparation = 10
    mutationNeighborhood = 20
    maxIterations = 20
    refineDinucleotideRepeats = True
    noEvidenceConsensus = "nocall"
    computeConfidence = True
    readStumpinessThreshold = 0.1
    
    refId = [x for x in reference.enumerateIds()][0]
    refSeq = reference.byId[refId].sequence
    refWindow = (refId, 0, reference.byId[refId].length)
    
    intStart, intEnd = interval
    subWin = subWindow(refWindow, interval)
    
    windowRefSeq = refSeq[intStart:intEnd]
    rows = readsInWindow(cmpH5, subWin,
                           depthLimit = depthLimit,
                           minMapQV = minMapQV,
                           strategy = "longest",
                           stratum = None,
                           barcode = None)
    
    #print([cmpH5[row].alignedLength for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd)])
    spanningRows = [row for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd) ]
    
    alns = cmpH5[spanningRows]
    clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
    clippedAlns__ = [ aln for aln in clippedAlns_ if aln.alignedLength <= paddedTemplateWidth - 7]
    clippedAlns = filterAlns(subWin, clippedAlns__, readStumpinessThreshold)
    
    # Compute the POA consensus, which is our initial guess, and
    # should typically be > 99.5% accurate
    fwdSequences = [ a.read(orientation="genomic", aligned=False)
                     for a in clippedAlns]
    
    p = cc.PoaConsensus.FindConsensus(fwdSequences[:maxPoaCoverage])
    
    template = p.Sequence()
    
    tmplSeq = np.zeros((paddedTemplateWidth), dtype=np.uint8)
    tmplOrds = map(ord, template)
    tmplSeq[:len(tmplOrds)] = tmplOrds
    
    #read pos y, read x
    readSeqs = np.zeros((paddedTemplateWidth, len(clippedAlns)), dtype=np.uint8)
    
    for i in xrange(len(clippedAlns)):
        alnOrds = map(ord, fwdSequences[i])
        readSeqs[:len(alnOrds), i] = alnOrds
    
    #uint8
    #metric z, read pos y, read x
    qvInfo = np.zeros((8, paddedTemplateWidth, len(clippedAlns)), dtype=np.uint8)
    
    for i in xrange(len(clippedAlns)):
        qvInfo[0, :clippedAlns[i].readLength, i] = clippedAlns[i].InsertionQV(orientation="genomic", aligned=False)
        qvInfo[1, :clippedAlns[i].readLength, i] = clippedAlns[i].MergeQV(orientation="genomic", aligned=False)
        qvInfo[2, :clippedAlns[i].readLength, i] = clippedAlns[i].DeletionQV(orientation="genomic", aligned=False)
        qvInfo[3, :clippedAlns[i].readLength, i] = clippedAlns[i].DeletionTag(orientation="genomic", aligned=False)
        qvInfo[4, :clippedAlns[i].readLength, i] = clippedAlns[i].SubstitutionQV(orientation="genomic", aligned=False)
    
    if real_quiver:
        return template, len(tmplOrds), fwdSequences, qvInfo
    else:
        return tmplSeq, len(tmplOrds), readSeqs, qvInfo
Пример #4
0
def getReads(cmpH5,
             reference,
             interval,
             paddedTemplateWidth,
             depthLimit,
             real_quiver=False):

    minMapQV = 10
    minPoaCoverage = 3
    maxPoaCoverage = 11
    mutationSeparation = 10
    mutationNeighborhood = 20
    maxIterations = 20
    refineDinucleotideRepeats = True
    noEvidenceConsensus = "nocall"
    computeConfidence = True
    readStumpinessThreshold = 0.1

    refId = [x for x in reference.enumerateIds()][0]
    refSeq = reference.byId[refId].sequence
    refWindow = (refId, 0, reference.byId[refId].length)

    intStart, intEnd = interval
    subWin = subWindow(refWindow, interval)

    windowRefSeq = refSeq[intStart:intEnd]
    rows = readsInWindow(cmpH5,
                         subWin,
                         depthLimit=depthLimit,
                         minMapQV=minMapQV,
                         strategy="longest",
                         stratum=None,
                         barcode=None)

    #print([cmpH5[row].alignedLength for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd)])
    spanningRows = [
        row for row in rows
        if cmpH5[row].spansReferenceRange(intStart, intEnd)
    ]

    alns = cmpH5[spanningRows]
    clippedAlns_ = [aln.clippedTo(*interval) for aln in alns]
    clippedAlns__ = [
        aln for aln in clippedAlns_
        if aln.alignedLength <= paddedTemplateWidth - 7
    ]
    clippedAlns = filterAlns(subWin, clippedAlns__, readStumpinessThreshold)

    # Compute the POA consensus, which is our initial guess, and
    # should typically be > 99.5% accurate
    fwdSequences = [
        a.read(orientation="genomic", aligned=False) for a in clippedAlns
    ]

    p = cc.PoaConsensus.FindConsensus(fwdSequences[:maxPoaCoverage])

    template = p.Sequence()

    tmplSeq = np.zeros((paddedTemplateWidth), dtype=np.uint8)
    tmplOrds = map(ord, template)
    tmplSeq[:len(tmplOrds)] = tmplOrds

    #read pos y, read x
    readSeqs = np.zeros((paddedTemplateWidth, len(clippedAlns)),
                        dtype=np.uint8)

    for i in xrange(len(clippedAlns)):
        alnOrds = map(ord, fwdSequences[i])
        readSeqs[:len(alnOrds), i] = alnOrds

    #uint8
    #metric z, read pos y, read x
    qvInfo = np.zeros((8, paddedTemplateWidth, len(clippedAlns)),
                      dtype=np.uint8)

    for i in xrange(len(clippedAlns)):
        qvInfo[0, :clippedAlns[i].readLength,
               i] = clippedAlns[i].InsertionQV(orientation="genomic",
                                               aligned=False)
        qvInfo[1, :clippedAlns[i].readLength,
               i] = clippedAlns[i].MergeQV(orientation="genomic",
                                           aligned=False)
        qvInfo[2, :clippedAlns[i].readLength,
               i] = clippedAlns[i].DeletionQV(orientation="genomic",
                                              aligned=False)
        qvInfo[3, :clippedAlns[i].readLength,
               i] = clippedAlns[i].DeletionTag(orientation="genomic",
                                               aligned=False)
        qvInfo[4, :clippedAlns[i].readLength,
               i] = clippedAlns[i].SubstitutionQV(orientation="genomic",
                                                  aligned=False)

    if real_quiver:
        return template, len(tmplOrds), fwdSequences, qvInfo
    else:
        return tmplSeq, len(tmplOrds), readSeqs, qvInfo
Пример #5
0
options = dummy()
options.diploid = False
options.parametersFile = "/home/nick/workspace/btry6790_project/venv/lib/python2.7/site-packages/GenomicConsensus/quiver/resources/2013-09/GenomicConsensus/QuiverParameters.ini"
options.parameterSet = "best"
options.refineDinucleotideRepeats = True
options.noEvidenceConsensusCall = "nocall"
options.minMapQV = 10
options.fastMode = False

cmpH5 = CmpH5Reader('/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5')
quiverConfig = configure(options, cmpH5)

depthLimit = 100

reference.loadFromFile("/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta", cmpH5)
refId = [x for x in reference.enumerateIds()][0]
refSeq = reference.byId[refId].sequence
refWindow = (refId, 0, reference.byId[refId].length)

def run_real_quiver(cmpH5, quiverConfig, interval, depthLimit, refSeq, refWindow, seedConsensus):
    
    intStart, intEnd = interval
    subWin = subWindow(refWindow, interval)
    
    windowRefSeq = refSeq[intStart:intEnd]
    rows = readsInWindow(cmpH5, subWin,
                           depthLimit = depthLimit,
                           minMapQV = quiverConfig.minMapQV,
                           strategy = "longest",
                           stratum = None,
                           barcode = None)