Пример #1
0
    def extractMappedRead(self, aln, windowStart):
        """
        Given a clipped alignment, convert its coordinates into template
        space (starts with 0), bundle it up with its features as a
        MappedRead.
        """
        if isinstance(aln, CmpH5Alignment):
            die("Arrow does not support CmpH5 files!")

        assert aln.referenceSpan > 0

        def baseFeature(featureName):
            if aln.reader.hasBaseFeature(featureName):
                rawFeature = aln.baseFeature(featureName, aligned=False, orientation="native")
                return rawFeature.clip(0,255).astype(np.uint8)
            else:
                return np.zeros((aln.readLength,), dtype=np.uint8)

        name = aln.readName
        chemistry = aln.sequencingChemistry
        strand = cc.StrandType_REVERSE if aln.isReverseStrand else cc.StrandType_FORWARD
        read = cc.Read(name,
                       aln.read(aligned=False, orientation="native"),
                       cc.Uint8Vector(baseFeature("Ipd").tolist()),
                       cc.Uint8Vector(baseFeature("PulseWidth").tolist()),
                       cc.SNR(aln.hqRegionSnr),
                       chemistry)
        return cc.MappedRead(read,
                             strand,
                             int(aln.referenceStart - windowStart),
                             int(aln.referenceEnd   - windowStart))
Пример #2
0
def uniqueSingleBaseMutations(templateSequence, positions=None):
    """
    Return an iterator over all single-base mutations of a
    templateSequence that result in unique mutated sequences.
    """
    allBases = "ACGT"
    positions = positions or xrange(0, len(templateSequence))
    for tplStart in positions:
        tplBase     = templateSequence[tplStart]
        prevTplBase = templateSequence[tplStart-1] if (tplStart > 0) else None
        # snvs
        for subsBase in allBases:
            if subsBase != tplBase:
                yield cc.Mutation(cc.MutationType_SUBSTITUTION,
                                  tplStart,
                                  subsBase)
        # Insertions---only allowing insertions that are not cognate
        # with the previous base.
        for insBase in allBases:
            if insBase != prevTplBase:
                yield cc.Mutation(cc.MutationType_INSERTION,
                                  tplStart,
                                  insBase)
        # Deletion--only allowed if refBase does not match previous tpl base
        if tplBase != prevTplBase:
            yield cc.Mutation(cc.MutationType_DELETION, tplStart)
Пример #3
0
def configure(options, alnFile):
    if alnFile.readType != "standard":
        raise U.IncompatibleDataException(
            "The Arrow algorithm requires a BAM file containing standard (non-CCS) reads."
        )

    if options.diploid:
        logging.warn("Diploid analysis not yet supported under Arrow model.")

    # load parameters from file
    if options.parametersFile:
        logging.info("Loading model parameters from: ({0})".format(
            options.parametersFile))
        if not cc.LoadModels(options.parametersFile):
            die("Arrow: unable to load parameters from: ({0})".format(
                options.parametersFile))

    # test available chemistries
    supp = set(cc.SupportedChemistries())
    logging.info("Found consensus models for: ({0})".format(", ".join(
        sorted(supp))))

    used = set([])
    if options.parametersSpec != "auto":
        logging.info("Overriding model selection with: ({0})".format(
            options.parametersSpec))
        if not cc.OverrideModel(options.parametersSpec):
            die("Arrow: unable to override model with: ({0})".format(
                options.parametersSpec))
        used.add(options.parametersSpec)
    else:
        used.update(alnFile.sequencingChemistry)
        unsupp = used - supp
        if used - supp:
            die("Arrow: unsupported chemistries found: ({0})".format(", ".join(
                sorted(unsupp))))

    # All arrow models require PW except P6 and the first S/P1-C1
    for readGroup in alnFile.readGroupTable:
        if set([readGroup["SequencingChemistry"]]) - set(
            ["P6-C4", "S/P1-C1/beta"]):
            if ("Ipd" not in readGroup["BaseFeatures"]
                    or "PulseWidth" not in readGroup["BaseFeatures"]):
                die("Arrow model requires missing base feature: IPD or PulseWidth"
                    )

    logging.info("Using consensus models for: ({0})".format(", ".join(
        sorted(used))))

    return M.ArrowConfig(minMapQV=options.minMapQV,
                         noEvidenceConsensus=options.noEvidenceConsensusCall,
                         computeConfidence=(not options.fastMode),
                         minReadScore=options.minReadScore,
                         minHqRegionSnr=options.minHqRegionSnr,
                         minZScore=options.minZScore,
                         minAccuracy=options.minAccuracy)
Пример #4
0
def refineConsensus(ai, arrowConfig):
    """
    Given a MultiReadMutationScorer, identify and apply favorable
    template mutations.  Return (consensus, didConverge) :: (str, bool)
    """
    cfg = cc.PolishConfig(arrowConfig.maxIterations,
                          arrowConfig.mutationSeparation,
                          arrowConfig.mutationNeighborhood)
    isConverged, nTested, nApplied = cc.Polish(ai, cfg)
    return str(ai), isConverged
Пример #5
0
def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow,
                          cssQvInWindow=None, siteCoverage=None, aligner="affine",
                          ai=None):
    """
    Compare the consensus and the reference in this window, returning
    a list of variants.

    Uses the integrator to identify heterozygous variants.
    """
    assert (cssQvInWindow is None) == (siteCoverage is None)  # Both or none

    refId, refStart, refEnd = refWindow

    if ai is not None:
        #
        # Hunting diploid variants:
        # 1. find confident heterozygous sites;
        # 2. build a "diploid consensus" using IUPAC encoding
        #    for het sites; mark cssQv accordingly
        # 3. align diploid consensus to reference
        # 4. extract and decorate variants
        #
        assert str(ai) == cssSequenceInWindow
        iupacMutations = []  # List of (Mutation, confidence)
        for pos in xrange(0, ai.Length()):
            ds = cc.IsSiteHeterozygous(scoresForPosition(ai, pos), 40)
            if ds:
                muts = [None] + list(allSingleBaseMutations(cssSequenceInWindow, positions=[pos]))
                mut0 = muts[ds.Allele0]
                mut1 = muts[ds.Allele1]
                cssBase = cssSequenceInWindow[pos]
                packedMut = packMuts(cssBase, mut0, mut1)
                iupacMutations.append((packedMut, 40))

        # Create diploidCss by applying mutations, meanwhile updating the
        # confidence vector accordingly.
        diploidCss = cc.ApplyMutations([pair[0] for pair in iupacMutations],
                                       cssSequenceInWindow)

        diploidQv  = list(cssQvInWindow) if cssQvInWindow is not None else None

        runningLengthDiff = 0
        for (mut, conf) in iupacMutations:
            start = mut.Start() + runningLengthDiff
            end   = mut.End() + runningLengthDiff
            diploidQv[start:end] = [conf]
        assert len(diploidCss) == len(diploidQv)

        cssSequenceInWindow = diploidCss
        cssQvInWindow = diploidQv

    vars = variantsFromAlignment(refWindow,
                                 refSequenceInWindow, cssSequenceInWindow,
                                 cssQvInWindow, siteCoverage)
    return vars
Пример #6
0
def refineConsensus(ai, arrowConfig, polishDiploid=False):
    """
    Given a MultiReadMutationScorer, identify and apply favorable
    template mutations.  Return (consensus, didConverge) :: (str, bool)
    """
    cfg = cc.PolishConfig(arrowConfig.maxIterations,
                          arrowConfig.mutationSeparation,
                          arrowConfig.mutationNeighborhood, polishDiploid)
    if arrowConfig.maskRadius:
        _ = cc.Polish(ai, cfg)
        ai.MaskIntervals(arrowConfig.maskRadius, arrowConfig.maskErrorRate)
    polishResult = cc.Polish(ai, cfg)
    return str(ai), polishResult.hasConverged
Пример #7
0
    def onChunk(self, workChunk):
        referenceWindow  = workChunk.window
        refId, refStart, refEnd = referenceWindow

        refSeqInWindow = reference.sequenceInWindow(referenceWindow)

        # Quick cutout for no-coverage case
        if not workChunk.hasCoverage:
            noCallCss = ArrowConsensus.noCallConsensus(self.arrowConfig.noEvidenceConsensus,
                                                       referenceWindow, refSeqInWindow)
            return (referenceWindow, (noCallCss, []))

        # General case
        eWindow = reference.enlargedReferenceWindow(referenceWindow,
                                                    options.referenceChunkOverlap)
        _, eStart, eEnd = eWindow

        # We call consensus on the enlarged window and then map back
        # to the reference and clip the consensus at the implied
        # bounds.  This seems to be more reliable thank cutting the
        # consensus bluntly
        refContig = reference.byName[refId].sequence
        refSequenceInEnlargedWindow = refContig[eStart:eEnd]

        #
        # Get the consensus for the enlarged window.
        #
        css_, variants_ = \
            consensusAndVariantsForWindow(self._inAlnFile, eWindow,
                                          refContig, options.coverage, self.arrowConfig)

        #
        # Restrict the consensus and variants to the reference window.
        #
        ga = cc.Align(refSequenceInEnlargedWindow, css_.sequence)
        targetPositions = cc.TargetToQueryPositions(ga)
        cssStart = targetPositions[refStart-eStart]
        cssEnd   = targetPositions[refEnd-eStart]

        cssSequence    = css_.sequence[cssStart:cssEnd]
        cssQv          = css_.confidence[cssStart:cssEnd]
        variants       = [ v for v in variants_
                           if refStart <= v.refStart < refEnd ]

        consensusObj = Consensus(referenceWindow,
                                 cssSequence,
                                 cssQv)

        return (referenceWindow, (consensusObj, variants))
Пример #8
0
 def extractMappedRead(aln, windowStart):
     """
     Given a clipped alignment, convert its coordinates into template
     space (starts with 0), bundle it up with its features as a
     MappedRead.
     """
     assert aln.referenceSpan > 0
     name = aln.readName
     chemistry = aln.sequencingChemistry
     strand = cc.StrandEnum_REVERSE if aln.isReverseStrand else cc.StrandEnum_FORWARD
     read = cc.Read(name, ArrowConfig.extractFeatures(aln), chemistry)
     return (cc.MappedRead(read,
                           strand,
                           int(aln.referenceStart - windowStart),
                           int(aln.referenceEnd   - windowStart)),
             cc.SNR(aln.hqRegionSnr))
Пример #9
0
def consensusConfidence(ai, positions=None):
    """
    Returns an array of QV values reflecting the consensus confidence
    at each position specified.  If the `positions` argument is
    omitted, confidence values are returned for all positions in the
    consensus (str(ai)).
    """
    return np.array(np.clip(cc.ConsensusQVs(ai), 0, 93), dtype=np.uint8)
Пример #10
0
def allSingleBaseMutations(templateSequence, positions=None):
    """
    Same as ``uniqueSingleBaseMutations``, but no filtering as to
    whether the mutated sequences are unique.
    """
    allBases = "ACGT"
    positions = positions or xrange(0, len(templateSequence))
    for tplStart in positions:
        tplBase = templateSequence[tplStart]
        # snvs
        for subsBase in allBases:
            if subsBase != tplBase:
                yield cc.Mutation_Substitution(tplStart, subsBase)
        # Insertions
        for insBase in allBases:
            yield cc.Mutation_Insertion(tplStart, insBase)
        # Deletion
        yield cc.Mutation_Deletion(tplStart, 1)
Пример #11
0
def poaConsensus(fwdSequences, arrowConfig):
    seqLens = [len(seq) for seq in fwdSequences]
    median = np.median(seqLens)
    ordSeqs = sorted(fwdSequences, key=lambda seq: abs(len(seq) - median))
    ordSeqs = ordSeqs[:arrowConfig.maxPoaCoverage]
    cov = len(ordSeqs)
    minCov = 1 if cov < 5 else ((cov + 1) / 2 - 1)
    poaConfig = cc.DefaultPoaConfig(cc.AlignMode_GLOBAL)
    return cc.PoaConsensus.FindConsensus(ordSeqs, poaConfig, minCov)
Пример #12
0
def lifted(queryPositions, mappedRead):
    """
    Lift a mappedRead into a new coordinate system by using the
    position translation table `queryPositions`
    """
    newStart = queryPositions[mappedRead.TemplateStart]
    newEnd = queryPositions[mappedRead.TemplateEnd]
    copy = cc.MappedRead(mappedRead)
    copy.TemplateStart = newStart
    copy.TemplateEnd = newEnd
    return copy
Пример #13
0
def ScoreCas9Site(seq):
    maxKey = None
    maxAcc = None
    for key, rna in GUIDES.iteritems():
        query = rna + "NGG"
        aln = cc.Align(seq, query, cfg)
        Ns = sum(1 for b in aln.Query() if b == 'N')
        acc = (aln.Matches() + Ns) / float(len(query))
        if maxAcc is None or acc > maxAcc:
            maxKey = key
            maxAcc = acc
    return (maxKey, maxAcc)
Пример #14
0
def packMuts(cssBase, mut1, mut2):
    # Turn two muts (with same Start, End, LengthDiff) into a single mutation to
    # IUPAC.  The no-op mutation is coded as None.
    #
    # Example1: (_, Subs A, Subs T) -> Subs W
    # Example2: (_, Ins A, Ins T)   -> Ins W
    # Example3: (A, None, Subs T)   -> Subs W
    #
    nonNullMut = mut1 or mut2
    start   = nonNullMut.Start()
    mutType = nonNullMut.Type()
    newBase1 = mut1.Bases() if mut1 else cssBase
    newBase2 = mut2.Bases() if mut2 else cssBase
    newBasePacked = packIUPAC((newBase1, newBase2))
    return cc.Mutation(mutType, start, newBasePacked)
Пример #15
0
def sufficientlyAccurate(mappedRead, poaCss, minAccuracy):
    if minAccuracy <= 0.0:
        return True
    s, e = mappedRead.TemplateStart, mappedRead.TemplateEnd
    tpl = poaCss[s:e]
    if mappedRead.Strand == cc.StrandType_FORWARD:
        pass
    elif mappedRead.Strand == cc.StrandType_REVERSE:
        tpl = reverseComplement(tpl)
    else:
        return False
    aln = cc.AlignLinear(tpl, mappedRead.Seq)
    nErrors = sum(1 for t in aln.Transcript() if t != 'M')
    tlen = len(tpl)
    acc = 1.0 - 1.0 * min(nErrors, tlen) / tlen
    return acc >= minAccuracy
Пример #16
0
from resources.genomes import decodeGenome

MIN_ACC = 0.8

if len(sys.argv) < 4:
    print "ERROR:\tExpected at least 3 arguments but got {0}".format(
        len(sys.argv) - 1)
    print "Usage:\tloadingDiagnostic OUTPUT_PREFIX HG19.FASTA ALIGN_BAM [ALIGN_BAM ..]"
    raise SystemExit

outputPrefix = sys.argv[1]
genomeName = sys.argv[2]
indexedFasta = sys.argv[3]
inputFiles = sys.argv[4:]

cfg = cc.AlignConfig(cc.AlignParams.Default(), 1)

GUIDES = {
    "FMR1": "AGAGGCCGAACTGGGATAAC",
    "FMR1_201": "CGCGCGTCTGTCTTTCGACC",
    "HTT": "AGCGGGCCCAAACTCACGGT",
    "HTT_SQ1": "CTTATTAACAGCAGAGAACT"
}


def ScoreCas9Site(seq):
    maxKey = None
    maxAcc = None
    for key, rna in GUIDES.iteritems():
        query = rna + "NGG"
        aln = cc.Align(seq, query, cfg)
Пример #17
0
def consensusForAlignments(refWindow, refSequence, alns, arrowConfig):
    """
    Call consensus on this interval---without subdividing the interval
    further.

    Testable!

    Clipping has already been done!
    """
    _, refStart, refEnd = refWindow

    # Compute the POA consensus, which is our initial guess, and
    # should typically be > 99.5% accurate
    fwdSequences = [
        a.read(orientation="genomic", aligned=False) for a in alns
        if a.spansReferenceRange(refStart, refEnd)
    ]
    assert len(fwdSequences) >= arrowConfig.minPoaCoverage

    try:
        p = cc.PoaConsensus.FindConsensus(
            fwdSequences[:arrowConfig.maxPoaCoverage])
    except:
        logging.info("%s: POA could not be generated" % (refWindow, ))
        return ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                              refWindow, refSequence)
    ga = cc.Align(refSequence, p.Sequence)
    numPoaVariants = ga.Errors()
    poaCss = p.Sequence

    # Extract reads into ConsensusCore2-compatible objects, and map them into the
    # coordinates relative to the POA consensus
    mappedReads = [
        arrowConfig.extractMappedRead(aln, refStart) for aln in alns
    ]
    queryPositions = cc.TargetToQueryPositions(ga)
    mappedReads = [(lifted(queryPositions, mr), snr)
                   for (mr, snr) in mappedReads]

    # Load the mapped reads into the mutation scorer, and iterate
    # until convergence.
    ai = cc.MultiMolecularIntegrator(
        poaCss, cc.IntegratorConfig(arrowConfig.minZScore))
    coverage = 0
    for (mr, snr) in mappedReads:
        if (mr.TemplateEnd <= mr.TemplateStart
                or mr.TemplateEnd - mr.TemplateStart < 2 or mr.Length() < 2):
            continue
        coverage += 1 if ai.AddRead(mr, snr) == cc.AddReadResult_SUCCESS else 0

    # TODO(lhepler, dalexander): propagate coverage around somehow

    # Iterate until covergence
    try:
        assert coverage >= arrowConfig.minPoaCoverage, \
            "Insufficient coverage (%d) to call consensus (%d)" \
            % (coverage, arrowConfig.minPoaCoverage)

        _, converged = refineConsensus(ai, arrowConfig)
        assert converged, "Arrow did not converge to MLE"
        arrowCss = str(ai)
        if arrowConfig.computeConfidence:
            confidence = consensusConfidence(ai)
        else:
            confidence = np.zeros(shape=len(arrowCss), dtype=int)
        return ArrowConsensus(refWindow, arrowCss, confidence, ai)
    except:
        traceback = ''.join(format_exception(*sys.exc_info()))
        logging.info("%s: %s" % (refWindow, traceback))
        return ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                              refWindow, refSequence)
Пример #18
0
def variantsFromAlignment(refWindow,
                          refSeq,
                          cssSeq,
                          cssQV=None,
                          refCoverage=None):
    """
    Extract the variants implied by a pairwise alignment of cssSeq to
    refSeq reference.  If cssQV, refCoverage are provided, they will
    be used to decorate the variants with those attributes.

    Arguments:
      - cssQV: QV array, same length as css
      - refCoverage: coverage array, sample length as reference window

    This is trickier than in the haploid case.  We have to break out
    diploid variants as single bases, in order to avoid implying
    phase.
    """
    variants = []
    refId, refStart, refEnd = refWindow

    aln = cc.AlignAffineIupac(refSeq, cssSeq)
    alnTarget = aln.Target()
    alnQuery = aln.Query()

    assert (cssQV is None) == (refCoverage is None)  # Both or none
    assert len(refSeq) == refEnd - refStart
    assert cssQV is None or len(cssSeq) == len(cssQV)
    assert refCoverage is None or len(refSeq) == len(refCoverage)

    transcript = [
        X if (Q != "N" and T != "N") else "N"
        for (X, T, Q) in zip(aln.Transcript(), alnTarget, alnQuery)
    ]
    variants = []
    runStart = -1
    runStartRefPos = None
    runX = None
    refPos = refStart
    for pos, (X, T, Q) in enumerate(zip(transcript, alnTarget, alnQuery)):
        if X != runX or isHeterozygote(Q):
            if runStart >= 0 and runX not in "MN":
                # Package up the run and dump a variant
                ref = alnTarget[runStart:pos].replace("-", "")
                read = alnQuery[runStart:pos].replace("-", "")
                if isHeterozygote(read):
                    allele1, allele2 = unpackIUPAC(read)
                    var = Variant(refId, runStartRefPos, refPos, ref, allele1,
                                  allele2)
                else:
                    var = Variant(refId, runStartRefPos, refPos, ref, read)
                variants.append(var)
            runStart = pos
            runStartRefPos = refPos
            runX = X
        if T != "-": refPos += 1

    # This might be better handled within the loop above, just keeping
    # track of Qpos, Tpos
    if cssQV is not None:
        cssPosition = cc.TargetToQueryPositions(aln)
        for v in variants:
            # HACK ALERT: we are not really handling the confidence or
            # coverage for variants at last position of the window
            # correctly here.
            refPos_ = min(v.refStart - refStart, len(refCoverage) - 1)
            cssPos_ = min(cssPosition[v.refStart - refStart], len(cssQV) - 1)

            if refCoverage is not None: v.coverage = refCoverage[refPos_]
            if cssQV is not None: v.confidence = cssQV[cssPos_]

    return variants
Пример #19
0
def consensusForAlignments(refWindow,
                           refSequence,
                           alns,
                           arrowConfig,
                           draft=None,
                           polish=True,
                           alnsUsed=None):
    """
    Call consensus on this interval---without subdividing the interval
    further.

    Returns an ArrowConsensus object.

    Requires that clipping has already been done.

    If `draft` is provided, it will serve as the starting
    point for polishing.  If not, the POA will be used to generate a
    draft starting point.

    If `polish` is False, the arrow polishing procedure will not be
    used, and the draft consensus will be returned.

    `alnsUsed` is an output parameter; if not None, it should be an
    empty list on entry; on return from this function, the list will
    contain the alns objects that were actually used to compute the
    consensus (those not filtered out).
    """
    _, refStart, refEnd = refWindow

    if alnsUsed is not None:
        assert alnsUsed == []

    if draft is None:
        # Compute the POA consensus, which is our initial guess, and
        # should typically be > 99.5% accurate
        fwdSequences = [
            a.read(orientation="genomic", aligned=False) for a in alns
            if a.spansReferenceRange(refStart, refEnd)
        ]
        assert len(fwdSequences) >= arrowConfig.minPoaCoverage

        try:
            p = poaConsensus(fwdSequences, arrowConfig)
        except Exception:
            logging.info("%s: POA could not be generated" % (refWindow, ))
            return ArrowConsensus.noCallConsensus(
                arrowConfig.noEvidenceConsensus, refWindow, refSequence)
        draft = p.Sequence

    ga = cc.Align(refSequence, draft)

    # Extract reads into ConsensusCore2-compatible objects, and map them into the
    # coordinates relative to the POA consensus
    mappedReads = [
        arrowConfig.extractMappedRead(aln, refStart) for aln in alns
    ]
    queryPositions = cc.TargetToQueryPositions(ga)
    mappedReads = [lifted(queryPositions, mr) for mr in mappedReads]

    # Load the mapped reads into the mutation scorer, and iterate
    # until convergence.
    ai = cc.Integrator(draft, cc.IntegratorConfig(arrowConfig.minZScore))
    coverage = 0
    for i, mr in enumerate(mappedReads):
        if (mr.TemplateEnd <= mr.TemplateStart
                or mr.TemplateEnd - mr.TemplateStart < 2 or mr.Length() < 2):
            continue
        if not sufficientlyAccurate(mr, draft, arrowConfig.minAccuracy):
            tpl = draft[mr.TemplateStart:mr.TemplateEnd]
            if mr.Strand == cc.StrandType_FORWARD:
                pass
            elif mr.Strand == cc.StrandType_REVERSE:
                tpl = reverseComplement(tpl)
            else:
                tpl = "INACTIVE/UNMAPPED"
            logging.debug(
                "%s: skipping read '%s' due to insufficient accuracy, (poa, read): ('%s', '%s')"
                % (refWindow, mr.Name, tpl, mr.Seq))
            continue
        if ai.AddRead(mr) == cc.State_VALID:
            coverage += 1
            if alnsUsed is not None:
                alnsUsed.append(alns[i])

    if coverage < arrowConfig.minPoaCoverage:
        logging.info("%s: Inadequate coverage to call consensus" %
                     (refWindow, ))
        return ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                              refWindow, refSequence)

    if not polish:
        confidence = np.zeros(len(draft), dtype=int)
        return ArrowConsensus(refWindow, draft, confidence, ai)

    # Iterate until covergence
    _, converged = refineConsensus(ai, arrowConfig, polishDiploid=False)
    if converged:
        arrowCss = str(ai)
        if arrowConfig.computeConfidence:
            confidence = consensusConfidence(ai)
        else:
            confidence = np.zeros(shape=len(arrowCss), dtype=int)
    else:
        logging.info("%s: Arrow did not converge to MLE" % (refWindow, ))
        return ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                              refWindow, refSequence)

    if arrowConfig.polishDiploid:
        # additional rounds of diploid polishing
        _, converged = refineConsensus(ai, arrowConfig, polishDiploid=True)
        if converged:
            arrowCss = str(ai)
            if arrowConfig.computeConfidence:
                confidence = consensusConfidence(ai)
            else:
                confidence = np.zeros(shape=len(arrowCss), dtype=int)
        else:
            logging.info(
                "%s: Arrow (diploid) did not converge to optimal solution" %
                (refWindow, ))

    return ArrowConsensus(refWindow, arrowCss, confidence, ai)