Пример #1
0
    def scoreMods(self, modCalls):
        """
        For each modification in the best scoring configuration, score a config excluding the current mod against the winning config
        use this value as the Qmod for the deleted modification
        """

        qvModCalls = dict()

        modSeq = a.array('c')
        modSeq.fromstring(self.sequence)

        # Apply the found modifications to the raw sequence
        for (pos, call) in modCalls.items():
            modSeq[pos] = call

        for (pos, call) in modCalls.items():

            # Score the modified template at all positions affected by this mod
            modScore = self.scoreRegion(pos - self.post, pos + self.pre,
                                        modSeq)
            modScores = self.getRegionScores(pos - self.post, pos + self.pre,
                                             modSeq)

            if self.methylFractionFlag and self.rawKinetics.has_key(pos):
                if self.rawKinetics[pos]["coverage"] > self.methylMinCov:
                    modifiedMeanVectors = self.getContextMeans(
                        pos - self.post, pos + self.pre, modSeq)

            # Switch back to the unmodified base and re-score
            modSeq[pos] = canonicalBaseMap[call]
            noModScore = self.scoreRegion(pos - self.post, pos + self.pre,
                                          modSeq)
            noModScores = self.getRegionScores(pos - self.post, pos + self.pre,
                                               modSeq)

            if self.methylFractionFlag and self.rawKinetics.has_key(pos):
                if self.rawKinetics[pos]["coverage"] > self.methylMinCov:
                    unModifiedMeanVectors = self.getContextMeans(
                        pos - self.post, pos + self.pre, modSeq)

            # Put back the modified base
            modSeq[pos] = call

            # Compute score difference
            llr = modScore - noModScore

            # Convert from LLR to phred-scaled probability of modification
            qModScore = 10 * llr * log10e + 10 * log1p(exp(-llr)) * log10e

            # Figure out which secondary peaks were likely generated by this modification
            # What is the posterior that the peak was generated by this mod?
            maskPos = self.findMaskPositions(pos, modScores, noModScores)

            # FIXME:  Without this, currently, the identificationQv score is too low for many Ca5C sites
            # if self.useLDA:
            #     if self.rawKinetics.has_key(pos):
            #         if self.rawKinetics[pos].has_key('Ca5C'):
            #             llr = -self.rawKinetics[pos]['Ca5C']
            #             qModScore = 100 * llr * log10e + 100*log1p(exp(-llr))*log10e
            if self.methylFractionFlag and self.rawKinetics.has_key(pos):

                if self.rawKinetics[pos]["coverage"] > self.methylMinCov:

                    # Instantiate mixture estimation methods:
                    mixture = MixtureEstimationMethods(self.gbmModel.post,
                                                       self.gbmModel.pre,
                                                       self.rawKinetics,
                                                       self.methylMinCov)

                    # Use modifiedMeanVectors and unmodifiedMeanVectors to calculate mixing proportion, and 95% CI limits.
                    methylFracEst, methylFracLow, methylFracUpp = mixture.estimateMethylatedFractions(
                        pos, unModifiedMeanVectors, modifiedMeanVectors,
                        ModificationPeakMask[modNames[call]])

                    qvModCalls[pos] = {
                        'modification': modNames[call],
                        'QMod': qModScore,
                        'LLR': llr,
                        'Mask': maskPos,
                        FRAC: methylFracEst,
                        FRAClow: methylFracLow,
                        FRACup: methylFracUpp
                    }

                else:
                    qvModCalls[pos] = {
                        'modification': modNames[call],
                        'QMod': qModScore,
                        'LLR': llr,
                        'Mask': maskPos
                    }

            else:
                # Store the full results
                qvModCalls[pos] = {
                    'modification': modNames[call],
                    'QMod': qModScore,
                    'LLR': llr,
                    'Mask': maskPos
                }

        return qvModCalls
    def scoreMods(self, modCalls):
        """
        For each modification in the best scoring configuration, score a config excluding the current mod against the winning config
        use this value as the Qmod for the deleted modification
        """

        qvModCalls = dict()

        modSeq = a.array('c')
        modSeq.fromstring(self.sequence)

        # Apply the found modifications to the raw sequence
        for (pos, call) in modCalls.items():
            modSeq[pos] = call

        for (pos, call) in modCalls.items():

            # Score the modified template at all positions affected by this mod
            modScore = self.scoreRegion(pos - self.post, pos + self.pre, modSeq)
            modScores = self.getRegionScores(pos - self.post, pos + self.pre, modSeq)

            if self.methylFractionFlag and self.rawKinetics.has_key(pos):
                if self.rawKinetics[pos]["coverage"] > self.methylMinCov:
                    modifiedMeanVectors = self.getContextMeans(pos - self.post, pos + self.pre, modSeq)

            # Switch back to the unmodified base and re-score
            modSeq[pos] = canonicalBaseMap[call]
            noModScore = self.scoreRegion(pos - self.post, pos + self.pre, modSeq)
            noModScores = self.getRegionScores(pos - self.post, pos + self.pre, modSeq)

            if self.methylFractionFlag and self.rawKinetics.has_key(pos):
                if self.rawKinetics[pos]["coverage"] > self.methylMinCov:
                    unModifiedMeanVectors = self.getContextMeans(pos - self.post, pos + self.pre, modSeq)

            # Put back the modified base
            modSeq[pos] = call

            # Compute score difference
            llr = modScore - noModScore

            # Convert from LLR to phred-scaled probability of modification
            qModScore = 10 * llr * log10e + 10 * log1p(exp(-llr)) * log10e

            # Figure out which secondary peaks were likely generated by this modification
            # What is the posterior that the peak was generated by this mod?
            maskPos = self.findMaskPositions(pos, modScores, noModScores)

            # FIXME:  Without this, currently, the identificationQv score is too low for many Ca5C sites
            # if self.useLDA:
            #     if self.rawKinetics.has_key(pos):
            #         if self.rawKinetics[pos].has_key('Ca5C'):
            #             llr = -self.rawKinetics[pos]['Ca5C']
            #             qModScore = 100 * llr * log10e + 100*log1p(exp(-llr))*log10e
            if self.methylFractionFlag and self.rawKinetics.has_key(pos):

                if self.rawKinetics[pos]["coverage"] > self.methylMinCov:

                    # Instantiate mixture estimation methods:
                    mixture = MixtureEstimationMethods(self.gbmModel.post, self.gbmModel.pre, self.rawKinetics, self.methylMinCov)

                    # Use modifiedMeanVectors and unmodifiedMeanVectors to calculate mixing proportion, and 95% CI limits.
                    methylFracEst, methylFracLow, methylFracUpp = mixture.estimateMethylatedFractions(pos, unModifiedMeanVectors, modifiedMeanVectors, ModificationPeakMask[modNames[call]])

                    qvModCalls[pos] = {'modification': modNames[call], 'QMod': qModScore, 'LLR': llr, 'Mask': maskPos,
                                       FRAC: methylFracEst, FRAClow: methylFracLow, FRACup: methylFracUpp}

                else:
                    qvModCalls[pos] = {'modification': modNames[call], 'QMod': qModScore, 'LLR': llr, 'Mask': maskPos}

            else:
                # Store the full results
                qvModCalls[pos] = {'modification': modNames[call], 'QMod': qModScore, 'LLR': llr, 'Mask': maskPos}

        return qvModCalls