Exemplos de Analytics.getPRMLadder em Python, exemplos de Analytics.getPRMLadder, speech_hmm em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: AlignDeNovoSequenceToReferenceNoSpectrumXDFGet.py Projeto: adevabhaktuni/LADS

def compareSequences(deNovoPep, deNovoUnmodPep, refPep, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = [], epsilon = 0.02):
    if 'X' in refPep:
        refPep = refPep.translate(None, 'X')

    # KLUDGE: REMOVE WHEN REWRITE
    #deNovoPep = An.stripModifications(deNovoPep, noRemove=['#', '*'])
    
    alignment = getAlignment(deNovoUnmodPep, refPep, AAMap, scoreMatrix)
    alignedIndsMap = getAlignedIndsMap(alignment)
    

    disagreeArr = [1 if alignment[0][i] == alignment[1][i] else 0 for i in range(len(alignment[0]))]
    intervals = getConnectedDisagreementRegions(disagreeArr)

    try:
        refPRMLadder = An.getPRMLadder(refPep)
    except KeyError:
        return None
    
    deNovoPRMLadder = An.getPRMLadder(deNovoPep, ambigEdges=deNovoAmbigEdges)

    allResolved = True
    modList = {}
    newRefEndInds = {'start': 0, 'end': 0}

    # rough check of whether or not intervals can be easily explained
    for interval in intervals:
        deNovoSubSeq = deNovoUnmodPep[alignedIndsMap['De Novo'][interval[0]]:alignedIndsMap['De Novo'][interval[1]]]
        refSubSeq = refPep[alignedIndsMap['Ref'][interval[0]]:alignedIndsMap['Ref'][interval[1]]]

        if alignedIndsMap['De Novo'][interval[0]] == 0:
                term = 'N-term'
        elif alignedIndsMap['De Novo'][interval[1]] == len(deNovoUnmodPep):
            term = 'C-term'
        else:
            term = None

        if deNovoSubSeq != '' and refSubSeq != '':
            deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]]
            if term == None:
                refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]]
                modList[interval] = resolveInterval(refMass, deNovoMass, refSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon), deNovoSubSeq, refSubSeq
            else:
                minSizedMod = ((None, None, 10000000,),)
                for i in range(len(refSubSeq)):
                    if term == 'N-term':
                        refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]] + i]
                        subRefSubSeq = refSubSeq[i:]
                    else:
                        refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]] - i] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]]
                        subRefSubSeq = refSubSeq[:-i]
                    mod = resolveInterval(refMass, deNovoMass, subRefSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon)
                    if 'TX' in deNovoUnmodPep:
                        print deNovoSubSeq, refSubSeq, subRefSubSeq, mod
                    if (abs(minSizedMod[0][2]) > abs(mod[2]) and (minSizedMod[0][0] == None or 'Isobaric' not in minSizedMod[0][0])) or 'Isobaric' in mod[0]:
                        if mod[1] != None or (mod[1] == None and minSizedMod[0][1] == None) or ('Isobaric' in mod[0] and 'Isobaric' not in minSizedMod[0][0]):
                            minSizedMod = mod, deNovoSubSeq, subRefSubSeq
                            if term == 'N-term':
                                newRefEndInds['start'] = i
                            else:
                                newRefEndInds['end'] = -i
                modList[interval] = minSizedMod
                    
        else:
            # Make sure that lack of sequence is due to overhang of reference peptide
            if alignedIndsMap['De Novo'][interval[1]] == 0:
                newRefEndInds['start'] = len(refSubSeq)
            elif alignedIndsMap['De Novo'][interval[0]] == len(deNovoUnmodPep):
                newRefEndInds['end'] = -len(refSubSeq)
#            elif term != None:
#                raise ValueError('Not enough reference sequence provided for resoluton of terminal discrepancies. De Novo: %s, Reference %s' % (deNovoPep, refPep))
            elif term == None:
                if deNovoSubSeq == '':
                    refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]]
                    modList[interval] = ('Deletion', refMass, 0, -refMass), deNovoSubSeq, refSubSeq
                else:
                    deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]]
                    modList[interval] = ('Insertion', deNovoMass, 0, deNovoMass), deNovoSubSeq, refSubSeq

    #print 'Mod List: ', modList
    acc, prec =  getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoPep, deNovoUnmodPep, refPep, alignedIndsMap, deNovoAmbigEdges)
    
    return modList, newRefEndInds, alignment, acc, prec

Exemplo n.º 2

0

Exibir arquivo

Arquivo: PostLADResultsAndWriteAccuracySVM.py Projeto: adevabhaktuni/LADS

def getSpectrumAndPSMFeatureDict(LADSSeqInfo, seqEntry, scanFDict, pairConfig, PNet):

    featureList = []
    lightScans = seqEntry[0]
    heavyScans = seqEntry[1]
    
    lightSpecs = [DataFile.getMassIntPairs(scanFDict[int(lightScanF)]['dta']) for lightScanF in lightScans]
    heavySpecs = [DataFile.getMassIntPairs(scanFDict[int(heavyScanF)]['dta']) for heavyScanF in heavyScans]
    avgLightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in lightScans]))
    
    epSTD = options.ppmstd * 10**-6 * avgLightPrecMass
    
    specs = []
    for i, massIntPairs in enumerate(lightSpecs):
        specs += [PN.Spectrum(PNet, scanFDict[lightScans[i]]['precMass'], Nmod=0.0, Cmod=0.0, epsilon=2*epSTD, spectrum=massIntPairs)]
    for i, massIntPairs in enumerate(heavySpecs):
        specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)]
    for spec in specs:
        spec.initializeNoiseModel()
                                                                                                                                                    
    clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD)
    GLFD.addClusterPairingStatsToFeatureList(clusterPairingStats, featureList)

    scoreStats = {}
    truePMs = {}
    prmLadders = {}
    for PSM in LADSSeqInfo[seqEntry]:
        lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2])
        scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2])

        prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True)
        truePMs[PSM[:2]] = prmLadderWithEnds[-1]
        prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1]
        
    PSMList = scoreStats.keys()
    spectrumOrderedScoreStats, clusterScoreStats = GLFD.compileScoreStats(scoreStats, specs, PSMList)

    spectrumAndPSMSpecificFeatureDict = {}
        
    PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)])
    for i, PSM in enumerate(LADSSeqInfo[seqEntry]):
        PSMSpecificFeatureList = copy.copy(featureList)

        peptLength = len(prmLadders[PSM[:2]]) + 1

        # Add LADS PScore (and normalized variants)  and delta rank, delta score (LADS PScore) to feature list
        PSMSpecificFeatureList += [PSM[0], PSM[0]/peptLength, PSM[0]/len(specs), -i, PSM[0]-LADSSeqInfo[seqEntry][0][0]]
        # Add Total Path Score (and normalized variants) and delta rank, delta score (total path score)  and total minimum node score to feature list
        totalPathScore = scoreStats[PSM[:2]]['Total Path Score']
        PSMSpecificFeatureList += [totalPathScore, totalPathScore/peptLength, totalPathScore/len(specs), -clusterScoreStats['PSM Rankings'][PSMIndexDict[PSM[:2]]], totalPathScore-clusterScoreStats['Max Cluster Path Score'], scoreStats[PSM[:2]]['Total Minimum Node Score']]
        
        # Add minimum path score, maximum path score, (and normalized variants) and minimum score/maximum score for cluster to feature list
        PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Minimum Path Score'], scoreStats[PSM[:2]]['Minimum Path Score']/peptLength, scoreStats[PSM[:2]]['Maximum Path Score'], scoreStats[PSM[:2]]['Maximum Path Score']/peptLength, scoreStats[PSM[:2]]['Minimum Path Score']/scoreStats[PSM[:2]]['Maximum Path Score']]
        
        # Add difference between minimum and maximum ranking for PSM across cluster to feature list
        rankingsForPSM = [spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]] for i in spectrumOrderedScoreStats]
        PSMSpecificFeatureList += [min(rankingsForPSM) - max(rankingsForPSM)]
        
        #Add Number forbidden node pairs (and normalized variants) to feature list
        numForbiddenPairs = Discriminator.getNumForbiddenPairs(prmLadders[PSM[:2]], avgLightPrecMass)
        PSMSpecificFeatureList += [numForbiddenPairs, 2.0*numForbiddenPairs/(peptLength-1)]

        # Add number of ambiguous edges to feature list
        PSMSpecificFeatureList += [len(PSM[2])]
        
        # Add stats for PRM Evidence over cluster (and normalized variants) to feature list
        PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence']/float(peptLength-1)]

        # Add stats for paired PRMs and their corresponding ion types to feature list
        pairedPRMStats = Discriminator.getPairedPRMStats(prmLadders[PSM[:2]], clusterPairingStats['Light Merged Spec'], clusterPairingStats['Heavy Merged Spec'], lightSpecs, heavySpecs, clusterPairingStats['Cluster Paired PRM Information'], epSTD=epSTD)
        GLFD.addPairedPRMStatsToFeatureList(pairedPRMStats, PSMSpecificFeatureList, len(prmLadders[PSM[:2]]))

        pairedPRMLadder = pairedPRMStats['Paired PRM Ladder']        
    
        for i, scan in enumerate(lightScans):
            spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList)
            # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list
            pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]]
            numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]]
            spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]]
            
            # Add mass deviation from true peptide mass to feature list
            precMass = scanFDict[scan]['precMass']
            spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)]
        
            peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet)
            GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength)
            GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList)
        
            spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength]
            spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList

        for j, scan in enumerate(heavyScans):
            i = j + len(lightScans)
            
            spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList)
            # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list
            pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]]
            numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]]
            spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]]
            
            # Add mass deviation from true peptide mass to feature list
            precMass = scanFDict[scan]['precMass']
            spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + pairConfig['NMod'] + pairConfig['CMod'] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)]
            
            peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet)
            GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength)
            GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList)
            
            spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength]
            spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList

    return spectrumAndPSMSpecificFeatureDict

Exemplo n.º 3

0

Exibir arquivo

Arquivo: AlignDeNovoSequenceToReferenceNoSpectrumXDFGet.py Projeto: adevabhaktuni/LADS

def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0):
    deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True)
    #print deNovoPRMLadder

    print 'De Novo', deNovoPept
    print 'DB', dbPept
    
    dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True)

    startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept)
    sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength)

    tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags)

    maxScore = None
    maxScoringTag = None
    
    #print sorted(tagGraph.nodes(data=True))
    #print sorted(tagGraph.edges(data=True))
    for tag in nx.topological_sort(tagGraph):
        nodeScore = tag[0][1] - tag[0][0]
        #print 'Tag', tag
        for prevTag in tagGraph.predecessors(tag):
            nModSymbol = None
            # Define terminus of peptide for modification annotation
            if tagGraph.node[prevTag]['position'] == 'start':
                term = 'N-term'
            elif tagGraph.node[tag]['position'] == 'end':
                term = 'C-term'
            else:
                term = None

            
            refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]]
            deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]]
            refSubSeq = dbPept[prevTag[1][1]:tag[1][0]]
            deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]]

            mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol)
            modPenalty = defModPenalty
            for mod in mods:
                if 'Isobaric Substitution' == mod[0]:
                    modPenalty = isobaricPenalty
                elif 'Insertion' == mod[0] or 'Deletion' == mod[0]:
                    modPenalty = inDelPenalty
                elif 'Undefined Mass Shift' == mod[0]:
                    modPenalty = undefModPenalty
            if not mods:
                modPenalty = 0

            tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modPenalty
            tagGraph.edge[prevTag][tag]['mods'] = mods

            print prevTag, tag, deNovoSubSeq, refSubSeq, mods
            
            if 'score' not in tagGraph.node[prevTag]:
                tagGraph.node[prevTag]['score'] = defaultScore

            try:
                tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + nodeScore + modPenalty)
            except KeyError:
                tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + nodeScore + modPenalty

            if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore:
                maxScore = tagGraph.node[tag]['score']
                maxScoringTag = tag

    if maxScoringTag != None:
        return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag)
    else:
        return None, None, None

Exemplo n.º 4

0

Exibir arquivo

Arquivo: AlignDeNovoSequenceToReferenceNoSpectrumXDFGet.py Projeto: adevabhaktuni/LADS

    outFile.write('\t'.join([col for col in cols]) + '\n')

    for entry in DataFile.getScanInfo(options.comp, delimiter='\t'):
        scanData = {}
        scanData['ScanF'] = entry[infoDict['ScanF']]
        scanData['Peptide'] = entry[infoDict['Peptide']]
        scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'], noRemove=[])
        scanData['Score'] = entry[infoDict['Score']]
        scanData['Alignment Score'] = None
        
        if 'Ambig Edges' in infoDict:
            ambigEdges = eval(entry[infoDict['Ambig Edges']])
            scanData['Ambig Edges'] = ambigEdges
        else:
            ambigEdges = []
        deNovoPRMLadder = An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)
                
        refList = eval(entry[infoDict['References']])
        subjSequence = getSequence(options.fasta, refList[0][0])[refList[0][1]-1:refList[0][2]]

        if scanData['Unmod Peptide'] == subjSequence:
            scanData['Modifications'] = []
            
            refSeqDict = {}
            for reference in refList:
                protSeq = getSequence(options.fasta, reference[0])

                # Sequence isn't found: can happen if there are duplicate identifiers in a fasta database and wrong sequence is retreived
                try:
                    refSeqDict[reference] = getReferenceSequence(protSeq, subjSequence, start= [reference[1]-1], end= [reference[2]])
                except IndexError:

Exemplo n.º 5

0

Exibir arquivo

Arquivo: GenerateSVMFormDiscriminantTrainingData.py Projeto: adevabhaktuni/LADS

            for i, massIntPairs in enumerate(heavySpecs):
                specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)]
            for spec in specs:
                spec.initializeNoiseModel()

            clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD)
            addClusterPairingStatsToFeatureList(clusterPairingStats, featureList)
            
            scoreStats = {}
            truePMs = {}
            prmLadders = {}
            for PSM in LADSSeqInfo[seqEntry]:
                lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2])
                scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2])

                prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True)
                truePMs[PSM[:2]] = prmLadderWithEnds[-1]
                prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1]
            
            PSMList = scoreStats.keys()
            spectrumOrderedScoreStats, clusterScoreStats = compileScoreStats(scoreStats, specs, PSMList)

            PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)])
            for i, PSM in enumerate(LADSSeqInfo[seqEntry]):
                PSMSpecificFeatureList = copy.copy(featureList)
                lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2])
                heavySeq = An.preprocessSequence(PSM[1], heavySeqMaps['silac_light_heavy'], replaceExistingTerminalMods=True, ambigEdges=PSM[2])
                
                peptLength = len(prmLadders[PSM[:2]]) + 1

                # Add LADS PScore (and normalized variants)  and delta rank, delta score (LADS PScore) to feature list

Exemplo n.º 6

0

Exibir arquivo

Arquivo: AlignDeNovoSequenceToReference.py Projeto: adevabhaktuni/LADS

def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, spec, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0):
    deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True)
    #print deNovoPRMLadder

    dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True)

    startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept)
    sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength)

    tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags)

    maxScore = None
    maxScoringTag = None
    
    #print sorted(tagGraph.nodes(data=True))
    #print sorted(tagGraph.edges(data=True))
    for tag in nx.topological_sort(tagGraph):
        if tagGraph.node[tag]['position'] == 'internal':
            nodeScore = getScoreFromPRMs(spec, deNovoPRMLadder[tag[0][0]:tag[0][1]+1], deNovoTerm = getDeNovoTerm(tag, len(deNovoPept)))
        else:
            nodeScore = 0
            
        #print 'Tag', tag
        for prevTag in tagGraph.predecessors(tag):
            nModSymbol = None
            # Define terminus of peptide for modification annotation
            if tagGraph.node[prevTag]['position'] == 'start':
                term = 'N-term'
            elif tagGraph.node[tag]['position'] == 'end':
                term = 'C-term'
            else:
                term = None

            
            refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]]
            deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]]
            refSubSeq = dbPept[prevTag[1][1]:tag[1][0]]
            deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]]

            mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol)

            modPenalty = 0
            modScore = 0

            if len(mods) > 0:
                if 'Isobaric Substitution' == mods[0][0]:
                    modPenalty = isobaricPenalty
                    modScore = getTagScore(spec, refSubSeq, startMass= deNovoPRMLadder[prevTag[0][1]], deNovoTerm = None, addTerminalNodes=False, verbose=True)
                    print modScore, refSubSeq
                elif 'Insertion' == mods[0][0]:
                    modPenalty = inDelPenalty
                    modScore = getScoreFromPRMs(spec, deNovoPRMLadder[prevTag[0][1]:tag[0][0]+1], deNovoTerm = None, addTerminalNodes=False)

                elif 'Deletion' == mods[0][0]:
                    modPenalty = inDelPenalty * len(deNovoSubSeq)

                elif 'Undefined Mass Shift' == mods[0][0]:
                    modPenalty = undefModPenalty

                    modPepts = getModPeptides(mods[0], refSubSeq, term, unimodDict)
                    modScores = []
                    for pept in modPepts:
                        modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), pept)]

                    modScore, modPept = max(modScores)
                    mods = (mods[0][:-1] + (modPept[0],),)
                else:
                    modPenalty = defModPenalty
                    
                    modScores = []
                    for modData in mods:
                        modPepts = getModPeptides(modData, refSubSeq, term, unimodDict)
                        for pept in modPepts:
                            modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), (modData, pept))]
                    modScore, modPept = max(modScores)
                    mods = (modPept[0][:-1] + (modPept[1][0],),)

            tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modScore + modPenalty
            tagGraph.edge[prevTag][tag]['mods'] = mods

            if 'score' not in tagGraph.node[prevTag]:
                tagGraph.node[prevTag]['score'] = defaultScore

            try:
                tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore'])
            except KeyError:
                tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore']

            if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore:
                maxScore = tagGraph.node[tag]['score']
                maxScoringTag = tag

    if maxScoringTag != None:
        return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag)
    else:
        return None, None, None

Exemplo n.º 7

0

Exibir arquivo

Arquivo: AlignDeNovoSequenceToReference.py Projeto: adevabhaktuni/LADS

        scanData['Peptide'] = entry[infoDict['Peptide']]
        scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'])
        scanData['Score'] = entry[infoDict['Score']]
        scanData['Alignment Score'] = None
        if 'Ambig Edges' in infoDict:
            ambigEdges = eval(entry[infoDict['Ambig Edges']])
            scanData['Ambig Edges'] = ambigEdges
        else:
            ambigEdges = []

        massIntPairs = DataFile.getMassIntPairs(scanFDict[int(scanData['ScanF'])]['dta'])
        spec = PN.Spectrum(PNet, precMass, epsilon=2*epSTD, spectrum=massIntPairs)

        try:
            #Ignore de novo peptides with noncanonical amino acids for now
            epsilon = 2 * 10**-6 * options.ppmstd * An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)[-1]
        except KeyError:
            continue
        
        refList = eval(entry[infoDict['References']])
        subjSequence = seqDict[refList[0][0]][refList[0][1]-1:refList[0][2]]

        if scanData['Unmod Peptide'] == subjSequence:
            scanData['Modifications'] = []
            
            refSeqDict = {}
            for reference in refList:
                refSeqDict[reference] = getReferenceSequence(seqDict[reference[0]], subjSequence, start=reference[1]-1, end=reference[2])

            scanData['DB Peptide'] = refSeqDict
            scanData['References'] = [ref[0] for ref in refList]