def compareSequences(deNovoPep, deNovoUnmodPep, refPep, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = [], epsilon = 0.02): if 'X' in refPep: refPep = refPep.translate(None, 'X') # KLUDGE: REMOVE WHEN REWRITE #deNovoPep = An.stripModifications(deNovoPep, noRemove=['#', '*']) alignment = getAlignment(deNovoUnmodPep, refPep, AAMap, scoreMatrix) alignedIndsMap = getAlignedIndsMap(alignment) disagreeArr = [1 if alignment[0][i] == alignment[1][i] else 0 for i in range(len(alignment[0]))] intervals = getConnectedDisagreementRegions(disagreeArr) try: refPRMLadder = An.getPRMLadder(refPep) except KeyError: return None deNovoPRMLadder = An.getPRMLadder(deNovoPep, ambigEdges=deNovoAmbigEdges) allResolved = True modList = {} newRefEndInds = {'start': 0, 'end': 0} # rough check of whether or not intervals can be easily explained for interval in intervals: deNovoSubSeq = deNovoUnmodPep[alignedIndsMap['De Novo'][interval[0]]:alignedIndsMap['De Novo'][interval[1]]] refSubSeq = refPep[alignedIndsMap['Ref'][interval[0]]:alignedIndsMap['Ref'][interval[1]]] if alignedIndsMap['De Novo'][interval[0]] == 0: term = 'N-term' elif alignedIndsMap['De Novo'][interval[1]] == len(deNovoUnmodPep): term = 'C-term' else: term = None if deNovoSubSeq != '' and refSubSeq != '': deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]] if term == None: refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]] modList[interval] = resolveInterval(refMass, deNovoMass, refSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon), deNovoSubSeq, refSubSeq else: minSizedMod = ((None, None, 10000000,),) for i in range(len(refSubSeq)): if term == 'N-term': refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]] + i] subRefSubSeq = refSubSeq[i:] else: refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]] - i] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]] subRefSubSeq = refSubSeq[:-i] mod = resolveInterval(refMass, deNovoMass, subRefSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon) if 'TX' in deNovoUnmodPep: print deNovoSubSeq, refSubSeq, subRefSubSeq, mod if (abs(minSizedMod[0][2]) > abs(mod[2]) and (minSizedMod[0][0] == None or 'Isobaric' not in minSizedMod[0][0])) or 'Isobaric' in mod[0]: if mod[1] != None or (mod[1] == None and minSizedMod[0][1] == None) or ('Isobaric' in mod[0] and 'Isobaric' not in minSizedMod[0][0]): minSizedMod = mod, deNovoSubSeq, subRefSubSeq if term == 'N-term': newRefEndInds['start'] = i else: newRefEndInds['end'] = -i modList[interval] = minSizedMod else: # Make sure that lack of sequence is due to overhang of reference peptide if alignedIndsMap['De Novo'][interval[1]] == 0: newRefEndInds['start'] = len(refSubSeq) elif alignedIndsMap['De Novo'][interval[0]] == len(deNovoUnmodPep): newRefEndInds['end'] = -len(refSubSeq) # elif term != None: # raise ValueError('Not enough reference sequence provided for resoluton of terminal discrepancies. De Novo: %s, Reference %s' % (deNovoPep, refPep)) elif term == None: if deNovoSubSeq == '': refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]] modList[interval] = ('Deletion', refMass, 0, -refMass), deNovoSubSeq, refSubSeq else: deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]] modList[interval] = ('Insertion', deNovoMass, 0, deNovoMass), deNovoSubSeq, refSubSeq #print 'Mod List: ', modList acc, prec = getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoPep, deNovoUnmodPep, refPep, alignedIndsMap, deNovoAmbigEdges) return modList, newRefEndInds, alignment, acc, prec
def getSpectrumAndPSMFeatureDict(LADSSeqInfo, seqEntry, scanFDict, pairConfig, PNet): featureList = [] lightScans = seqEntry[0] heavyScans = seqEntry[1] lightSpecs = [DataFile.getMassIntPairs(scanFDict[int(lightScanF)]['dta']) for lightScanF in lightScans] heavySpecs = [DataFile.getMassIntPairs(scanFDict[int(heavyScanF)]['dta']) for heavyScanF in heavyScans] avgLightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in lightScans])) epSTD = options.ppmstd * 10**-6 * avgLightPrecMass specs = [] for i, massIntPairs in enumerate(lightSpecs): specs += [PN.Spectrum(PNet, scanFDict[lightScans[i]]['precMass'], Nmod=0.0, Cmod=0.0, epsilon=2*epSTD, spectrum=massIntPairs)] for i, massIntPairs in enumerate(heavySpecs): specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)] for spec in specs: spec.initializeNoiseModel() clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD) GLFD.addClusterPairingStatsToFeatureList(clusterPairingStats, featureList) scoreStats = {} truePMs = {} prmLadders = {} for PSM in LADSSeqInfo[seqEntry]: lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2]) scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2]) prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True) truePMs[PSM[:2]] = prmLadderWithEnds[-1] prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1] PSMList = scoreStats.keys() spectrumOrderedScoreStats, clusterScoreStats = GLFD.compileScoreStats(scoreStats, specs, PSMList) spectrumAndPSMSpecificFeatureDict = {} PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)]) for i, PSM in enumerate(LADSSeqInfo[seqEntry]): PSMSpecificFeatureList = copy.copy(featureList) peptLength = len(prmLadders[PSM[:2]]) + 1 # Add LADS PScore (and normalized variants) and delta rank, delta score (LADS PScore) to feature list PSMSpecificFeatureList += [PSM[0], PSM[0]/peptLength, PSM[0]/len(specs), -i, PSM[0]-LADSSeqInfo[seqEntry][0][0]] # Add Total Path Score (and normalized variants) and delta rank, delta score (total path score) and total minimum node score to feature list totalPathScore = scoreStats[PSM[:2]]['Total Path Score'] PSMSpecificFeatureList += [totalPathScore, totalPathScore/peptLength, totalPathScore/len(specs), -clusterScoreStats['PSM Rankings'][PSMIndexDict[PSM[:2]]], totalPathScore-clusterScoreStats['Max Cluster Path Score'], scoreStats[PSM[:2]]['Total Minimum Node Score']] # Add minimum path score, maximum path score, (and normalized variants) and minimum score/maximum score for cluster to feature list PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Minimum Path Score'], scoreStats[PSM[:2]]['Minimum Path Score']/peptLength, scoreStats[PSM[:2]]['Maximum Path Score'], scoreStats[PSM[:2]]['Maximum Path Score']/peptLength, scoreStats[PSM[:2]]['Minimum Path Score']/scoreStats[PSM[:2]]['Maximum Path Score']] # Add difference between minimum and maximum ranking for PSM across cluster to feature list rankingsForPSM = [spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]] for i in spectrumOrderedScoreStats] PSMSpecificFeatureList += [min(rankingsForPSM) - max(rankingsForPSM)] #Add Number forbidden node pairs (and normalized variants) to feature list numForbiddenPairs = Discriminator.getNumForbiddenPairs(prmLadders[PSM[:2]], avgLightPrecMass) PSMSpecificFeatureList += [numForbiddenPairs, 2.0*numForbiddenPairs/(peptLength-1)] # Add number of ambiguous edges to feature list PSMSpecificFeatureList += [len(PSM[2])] # Add stats for PRM Evidence over cluster (and normalized variants) to feature list PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence']/float(peptLength-1)] # Add stats for paired PRMs and their corresponding ion types to feature list pairedPRMStats = Discriminator.getPairedPRMStats(prmLadders[PSM[:2]], clusterPairingStats['Light Merged Spec'], clusterPairingStats['Heavy Merged Spec'], lightSpecs, heavySpecs, clusterPairingStats['Cluster Paired PRM Information'], epSTD=epSTD) GLFD.addPairedPRMStatsToFeatureList(pairedPRMStats, PSMSpecificFeatureList, len(prmLadders[PSM[:2]])) pairedPRMLadder = pairedPRMStats['Paired PRM Ladder'] for i, scan in enumerate(lightScans): spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList) # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]] numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]] spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]] # Add mass deviation from true peptide mass to feature list precMass = scanFDict[scan]['precMass'] spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)] peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet) GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength) GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList) spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength] spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList for j, scan in enumerate(heavyScans): i = j + len(lightScans) spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList) # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]] numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]] spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]] # Add mass deviation from true peptide mass to feature list precMass = scanFDict[scan]['precMass'] spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + pairConfig['NMod'] + pairConfig['CMod'] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)] peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet) GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength) GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList) spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength] spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList return spectrumAndPSMSpecificFeatureDict
def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0): deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True) #print deNovoPRMLadder print 'De Novo', deNovoPept print 'DB', dbPept dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True) startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept) sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength) tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags) maxScore = None maxScoringTag = None #print sorted(tagGraph.nodes(data=True)) #print sorted(tagGraph.edges(data=True)) for tag in nx.topological_sort(tagGraph): nodeScore = tag[0][1] - tag[0][0] #print 'Tag', tag for prevTag in tagGraph.predecessors(tag): nModSymbol = None # Define terminus of peptide for modification annotation if tagGraph.node[prevTag]['position'] == 'start': term = 'N-term' elif tagGraph.node[tag]['position'] == 'end': term = 'C-term' else: term = None refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]] deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]] refSubSeq = dbPept[prevTag[1][1]:tag[1][0]] deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]] mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol) modPenalty = defModPenalty for mod in mods: if 'Isobaric Substitution' == mod[0]: modPenalty = isobaricPenalty elif 'Insertion' == mod[0] or 'Deletion' == mod[0]: modPenalty = inDelPenalty elif 'Undefined Mass Shift' == mod[0]: modPenalty = undefModPenalty if not mods: modPenalty = 0 tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modPenalty tagGraph.edge[prevTag][tag]['mods'] = mods print prevTag, tag, deNovoSubSeq, refSubSeq, mods if 'score' not in tagGraph.node[prevTag]: tagGraph.node[prevTag]['score'] = defaultScore try: tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + nodeScore + modPenalty) except KeyError: tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + nodeScore + modPenalty if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore: maxScore = tagGraph.node[tag]['score'] maxScoringTag = tag if maxScoringTag != None: return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag) else: return None, None, None
outFile.write('\t'.join([col for col in cols]) + '\n') for entry in DataFile.getScanInfo(options.comp, delimiter='\t'): scanData = {} scanData['ScanF'] = entry[infoDict['ScanF']] scanData['Peptide'] = entry[infoDict['Peptide']] scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'], noRemove=[]) scanData['Score'] = entry[infoDict['Score']] scanData['Alignment Score'] = None if 'Ambig Edges' in infoDict: ambigEdges = eval(entry[infoDict['Ambig Edges']]) scanData['Ambig Edges'] = ambigEdges else: ambigEdges = [] deNovoPRMLadder = An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges) refList = eval(entry[infoDict['References']]) subjSequence = getSequence(options.fasta, refList[0][0])[refList[0][1]-1:refList[0][2]] if scanData['Unmod Peptide'] == subjSequence: scanData['Modifications'] = [] refSeqDict = {} for reference in refList: protSeq = getSequence(options.fasta, reference[0]) # Sequence isn't found: can happen if there are duplicate identifiers in a fasta database and wrong sequence is retreived try: refSeqDict[reference] = getReferenceSequence(protSeq, subjSequence, start= [reference[1]-1], end= [reference[2]]) except IndexError:
for i, massIntPairs in enumerate(heavySpecs): specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)] for spec in specs: spec.initializeNoiseModel() clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD) addClusterPairingStatsToFeatureList(clusterPairingStats, featureList) scoreStats = {} truePMs = {} prmLadders = {} for PSM in LADSSeqInfo[seqEntry]: lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2]) scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2]) prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True) truePMs[PSM[:2]] = prmLadderWithEnds[-1] prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1] PSMList = scoreStats.keys() spectrumOrderedScoreStats, clusterScoreStats = compileScoreStats(scoreStats, specs, PSMList) PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)]) for i, PSM in enumerate(LADSSeqInfo[seqEntry]): PSMSpecificFeatureList = copy.copy(featureList) lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2]) heavySeq = An.preprocessSequence(PSM[1], heavySeqMaps['silac_light_heavy'], replaceExistingTerminalMods=True, ambigEdges=PSM[2]) peptLength = len(prmLadders[PSM[:2]]) + 1 # Add LADS PScore (and normalized variants) and delta rank, delta score (LADS PScore) to feature list
def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, spec, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0): deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True) #print deNovoPRMLadder dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True) startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept) sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength) tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags) maxScore = None maxScoringTag = None #print sorted(tagGraph.nodes(data=True)) #print sorted(tagGraph.edges(data=True)) for tag in nx.topological_sort(tagGraph): if tagGraph.node[tag]['position'] == 'internal': nodeScore = getScoreFromPRMs(spec, deNovoPRMLadder[tag[0][0]:tag[0][1]+1], deNovoTerm = getDeNovoTerm(tag, len(deNovoPept))) else: nodeScore = 0 #print 'Tag', tag for prevTag in tagGraph.predecessors(tag): nModSymbol = None # Define terminus of peptide for modification annotation if tagGraph.node[prevTag]['position'] == 'start': term = 'N-term' elif tagGraph.node[tag]['position'] == 'end': term = 'C-term' else: term = None refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]] deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]] refSubSeq = dbPept[prevTag[1][1]:tag[1][0]] deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]] mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol) modPenalty = 0 modScore = 0 if len(mods) > 0: if 'Isobaric Substitution' == mods[0][0]: modPenalty = isobaricPenalty modScore = getTagScore(spec, refSubSeq, startMass= deNovoPRMLadder[prevTag[0][1]], deNovoTerm = None, addTerminalNodes=False, verbose=True) print modScore, refSubSeq elif 'Insertion' == mods[0][0]: modPenalty = inDelPenalty modScore = getScoreFromPRMs(spec, deNovoPRMLadder[prevTag[0][1]:tag[0][0]+1], deNovoTerm = None, addTerminalNodes=False) elif 'Deletion' == mods[0][0]: modPenalty = inDelPenalty * len(deNovoSubSeq) elif 'Undefined Mass Shift' == mods[0][0]: modPenalty = undefModPenalty modPepts = getModPeptides(mods[0], refSubSeq, term, unimodDict) modScores = [] for pept in modPepts: modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), pept)] modScore, modPept = max(modScores) mods = (mods[0][:-1] + (modPept[0],),) else: modPenalty = defModPenalty modScores = [] for modData in mods: modPepts = getModPeptides(modData, refSubSeq, term, unimodDict) for pept in modPepts: modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), (modData, pept))] modScore, modPept = max(modScores) mods = (modPept[0][:-1] + (modPept[1][0],),) tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modScore + modPenalty tagGraph.edge[prevTag][tag]['mods'] = mods if 'score' not in tagGraph.node[prevTag]: tagGraph.node[prevTag]['score'] = defaultScore try: tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore']) except KeyError: tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore'] if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore: maxScore = tagGraph.node[tag]['score'] maxScoringTag = tag if maxScoringTag != None: return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag) else: return None, None, None
scanData['Peptide'] = entry[infoDict['Peptide']] scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide']) scanData['Score'] = entry[infoDict['Score']] scanData['Alignment Score'] = None if 'Ambig Edges' in infoDict: ambigEdges = eval(entry[infoDict['Ambig Edges']]) scanData['Ambig Edges'] = ambigEdges else: ambigEdges = [] massIntPairs = DataFile.getMassIntPairs(scanFDict[int(scanData['ScanF'])]['dta']) spec = PN.Spectrum(PNet, precMass, epsilon=2*epSTD, spectrum=massIntPairs) try: #Ignore de novo peptides with noncanonical amino acids for now epsilon = 2 * 10**-6 * options.ppmstd * An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)[-1] except KeyError: continue refList = eval(entry[infoDict['References']]) subjSequence = seqDict[refList[0][0]][refList[0][1]-1:refList[0][2]] if scanData['Unmod Peptide'] == subjSequence: scanData['Modifications'] = [] refSeqDict = {} for reference in refList: refSeqDict[reference] = getReferenceSequence(seqDict[reference[0]], subjSequence, start=reference[1]-1, end=reference[2]) scanData['DB Peptide'] = refSeqDict scanData['References'] = [ref[0] for ref in refList]