Пример #1
0
def scoresForPosition(mms, pos):
    muts = allSingleBaseMutations(mms.Template(), positions=[pos])
    noMutScore = [0] * mms.NumReads()
    mutScores_ = [ mms.Scores(mut)
                   for mut in muts ]
    mutScores = np.column_stack([noMutScore] + mutScores_).astype(np.float32)
    return mutScores
Пример #2
0
def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow,
                          cssQvInWindow=None, siteCoverage=None, aligner="affine",
                          mms=None):
    """
    Compare the consensus and the reference in this window, returning
    a list of variants.

    Uses the mms to identify heterozygous variants.
    """
    assert (cssQvInWindow is None) == (siteCoverage is None)  # Both or none

    refId, refStart, refEnd = refWindow

    if mms is not None:
        #
        # Hunting diploid variants:
        # 1. find confident heterozygous sites;
        # 2. build a "diploid consensus" using IUPAC encoding
        #    for het sites; mark cssQv accordingly
        # 3. align diploid consensus to reference
        # 4. extract and decorate variants
        #
        assert mms.Template() == cssSequenceInWindow
        iupacMutations = []  # List of (Mutation, confidence)
        for pos in xrange(0, mms.TemplateLength()):
            ds = cc.IsSiteHeterozygous(scoresForPosition(mms, pos), 40)
            if ds:
                muts = [None] + list(allSingleBaseMutations(cssSequenceInWindow, positions=[pos]))
                mut0 = muts[ds.Allele0]
                mut1 = muts[ds.Allele1]
                cssBase = cssSequenceInWindow[pos]
                packedMut = packMuts(cssBase, mut0, mut1)
                iupacMutations.append((packedMut, 40))

        # Create diploidCss by applying mutations, meanwhile updating the
        # confidence vector accordingly.
        diploidCss = cc.ApplyMutations([pair[0] for pair in iupacMutations],
                                       cssSequenceInWindow)

        diploidQv  = list(cssQvInWindow) if cssQvInWindow is not None else None

        runningLengthDiff = 0
        for (mut, conf) in iupacMutations:
            start = mut.Start() + runningLengthDiff
            end   = mut.End() + runningLengthDiff
            diploidQv[start:end] = [conf]
        assert len(diploidCss) == len(diploidQv)

        cssSequenceInWindow = diploidCss
        cssQvInWindow = diploidQv

    vars = variantsFromAlignment(refWindow,
                                 refSequenceInWindow, cssSequenceInWindow,
                                 cssQvInWindow, siteCoverage)
    return vars
Пример #3
0
def scoresForPosition(mms, pos):
    muts = allSingleBaseMutations(mms.Template(), positions=[pos])
    noMutScore = [0] * mms.NumReads()
    mutScores_ = [mms.Scores(mut) for mut in muts]
    mutScores = np.column_stack([noMutScore] + mutScores_).astype(np.float32)
    return mutScores
Пример #4
0
def variantsFromConsensus(refWindow,
                          refSequenceInWindow,
                          cssSequenceInWindow,
                          cssQvInWindow=None,
                          siteCoverage=None,
                          aligner="affine",
                          mms=None):
    """
    Compare the consensus and the reference in this window, returning
    a list of variants.

    Uses the mms to identify heterozygous variants.
    """
    assert (cssQvInWindow is None) == (siteCoverage is None)  # Both or none

    refId, refStart, refEnd = refWindow

    if mms is not None:
        #
        # Hunting diploid variants:
        # 1. find confident heterozygous sites;
        # 2. build a "diploid consensus" using IUPAC encoding
        #    for het sites; mark cssQv accordingly
        # 3. align diploid consensus to reference
        # 4. extract and decorate variants
        #
        assert mms.Template() == cssSequenceInWindow
        iupacMutations = []  # List of (Mutation, confidence)
        for pos in xrange(0, mms.TemplateLength()):
            ds = cc.IsSiteHeterozygous(scoresForPosition(mms, pos), 40)
            if ds:
                muts = [None] + list(
                    allSingleBaseMutations(cssSequenceInWindow,
                                           positions=[pos]))
                mut0 = muts[ds.Allele0]
                mut1 = muts[ds.Allele1]
                cssBase = cssSequenceInWindow[pos]
                packedMut = packMuts(cssBase, mut0, mut1)
                iupacMutations.append((packedMut, 40))

        # Create diploidCss by applying mutations, meanwhile updating the
        # confidence vector accordingly.
        diploidCss = cc.ApplyMutations([pair[0] for pair in iupacMutations],
                                       cssSequenceInWindow)

        diploidQv = list(cssQvInWindow) if cssQvInWindow is not None else None

        runningLengthDiff = 0
        for (mut, conf) in iupacMutations:
            start = mut.Start() + runningLengthDiff
            end = mut.End() + runningLengthDiff
            diploidQv[start:end] = [conf]
        assert len(diploidCss) == len(diploidQv)

        cssSequenceInWindow = diploidCss
        cssQvInWindow = diploidQv

    vars = variantsFromAlignment(refWindow, refSequenceInWindow,
                                 cssSequenceInWindow, cssQvInWindow,
                                 siteCoverage)
    return vars