def getUniquePeptDict(scanDict, scoreKey, peptideKey, scanKey = 'ScanF', nullVal = 'None', noStrip=['#'], datasets=None): scanFDict = defaultdict(lambda: dict([(dataset, []) for dataset in datasets])) uniquePeptDict = {} if datasets == None: datasets = scanDict.keys() for dataset in datasets: for item in scanDict[dataset]: if item[peptideKey] == nullVal: continue strippedPept = An.stripModifications(item[peptideKey], noRemove=noStrip) if strippedPept in uniquePeptDict and float(item[scoreKey]) > float(uniquePeptDict[strippedPept][scoreKey]): uniquePeptDict[strippedPept] = item elif strippedPept not in uniquePeptDict: uniquePeptDict[strippedPept] = item scanFDict[strippedPept][dataset] += [item[scanKey]] return uniquePeptDict, scanFDict
with open(options.unimoddict) as fin: unimodDict = pickle.load(fin) hashedUnimodDict = hashUnimodDict(unimodDict) outFile = open(options.output, 'w') cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score'] if 'Ambig Edges' in infoDict: cols.insert(2, 'Ambig Edges') outFile.write('\t'.join([col for col in cols]) + '\n') for entry in DataFile.getScanInfo(options.comp, delimiter='\t'): scanData = {} scanData['ScanF'] = entry[infoDict['ScanF']] scanData['Peptide'] = entry[infoDict['Peptide']] scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'], noRemove=[]) scanData['Score'] = entry[infoDict['Score']] scanData['Alignment Score'] = None if 'Ambig Edges' in infoDict: ambigEdges = eval(entry[infoDict['Ambig Edges']]) scanData['Ambig Edges'] = ambigEdges else: ambigEdges = [] deNovoPRMLadder = An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges) refList = eval(entry[infoDict['References']]) subjSequence = getSequence(options.fasta, refList[0][0])[refList[0][1]-1:refList[0][2]] if scanData['Unmod Peptide'] == subjSequence: scanData['Modifications'] = []
pairConfig = paramsDict['Pair Configurations'][pairConfigName] delta = pairConfig['NMod'] + pairConfig['CMod'] deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd) pairsOut = open(options.output + '_' + pairConfigName + '.txt', 'w') for pair in deltaPairs: epSTD = options.ppmstd * 10 ** -6 * scanFDict[precMassClusters[pair[0]][0]]['precMass'] # Get all possible true pairings from database search results uniquePeptideDict = defaultdict(lambda: {'light': [], 'heavy': []}) for scanF in precMassClusters[pair[0]]: if scanF in processedInfo[progName]: uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['light'] += [scanF] for scanF in precMassClusters[pair[1]]: if scanF in processedInfo[progName]: uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['heavy'] += [scanF] mergedSpecDictLight = {} mergedSpecDictHeavy = {} for peptide in uniquePeptideDict: if len(uniquePeptideDict[peptide]['light']) > 0: mergedSpecDictLight[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in uniquePeptideDict[peptide]['light']], epsilon = 2*epSTD) if len(uniquePeptideDict[peptide]['heavy']) > 0: mergedSpecDictHeavy[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in uniquePeptideDict[peptide]['heavy']], epsilon = 2*epSTD) for lightPept in mergedSpecDictLight:
unimodDict = pickle.load(fin) hashedUnimodDict = hashUnimodDict(unimodDict) outFile = open(options.output, 'w') cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score'] if 'Ambig Edges' in infoDict: cols.insert(2, 'Ambig Edges') outFile.write('\t'.join([col for col in cols]) + '\n') for entry in DataFile.getScanInfo(options.comp, delimiter='\t'): scanData = {} print "New scan", entry scanData['ScanF'] = entry[infoDict['ScanF']] scanData['Peptide'] = entry[infoDict['Peptide']] scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide']) scanData['Score'] = entry[infoDict['Score']] scanData['Alignment Score'] = None if 'Ambig Edges' in infoDict: ambigEdges = eval(entry[infoDict['Ambig Edges']]) scanData['Ambig Edges'] = ambigEdges else: ambigEdges = [] massIntPairs = DataFile.getMassIntPairs(scanFDict[int(scanData['ScanF'])]['dta']) spec = PN.Spectrum(PNet, precMass, epsilon=2*epSTD, spectrum=massIntPairs) try: #Ignore de novo peptides with noncanonical amino acids for now epsilon = 2 * 10**-6 * options.ppmstd * An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)[-1] except KeyError: