Пример #1
0
def getUniquePeptDict(scanDict, scoreKey, peptideKey, scanKey = 'ScanF', nullVal = 'None', noStrip=['#'], datasets=None):
    scanFDict = defaultdict(lambda: dict([(dataset, []) for dataset in datasets]))
    uniquePeptDict = {}
    if datasets == None:
        datasets = scanDict.keys()

    for dataset in datasets:
        for item in scanDict[dataset]:
            if item[peptideKey] == nullVal:
                continue
            
            strippedPept = An.stripModifications(item[peptideKey], noRemove=noStrip)
            
            if strippedPept in uniquePeptDict and float(item[scoreKey]) > float(uniquePeptDict[strippedPept][scoreKey]):
                uniquePeptDict[strippedPept] = item
            elif strippedPept not in uniquePeptDict:
                uniquePeptDict[strippedPept] = item


            scanFDict[strippedPept][dataset] += [item[scanKey]]

    return uniquePeptDict, scanFDict
    with open(options.unimoddict) as fin:
        unimodDict = pickle.load(fin)
    hashedUnimodDict = hashUnimodDict(unimodDict)

    outFile = open(options.output, 'w')
    cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score']
    if 'Ambig Edges' in infoDict:
        cols.insert(2, 'Ambig Edges')
        
    outFile.write('\t'.join([col for col in cols]) + '\n')

    for entry in DataFile.getScanInfo(options.comp, delimiter='\t'):
        scanData = {}
        scanData['ScanF'] = entry[infoDict['ScanF']]
        scanData['Peptide'] = entry[infoDict['Peptide']]
        scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'], noRemove=[])
        scanData['Score'] = entry[infoDict['Score']]
        scanData['Alignment Score'] = None
        
        if 'Ambig Edges' in infoDict:
            ambigEdges = eval(entry[infoDict['Ambig Edges']])
            scanData['Ambig Edges'] = ambigEdges
        else:
            ambigEdges = []
        deNovoPRMLadder = An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)
                
        refList = eval(entry[infoDict['References']])
        subjSequence = getSequence(options.fasta, refList[0][0])[refList[0][1]-1:refList[0][2]]

        if scanData['Unmod Peptide'] == subjSequence:
            scanData['Modifications'] = []
        pairConfig = paramsDict['Pair Configurations'][pairConfigName]

        delta = pairConfig['NMod'] + pairConfig['CMod']
        deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd)

        pairsOut = open(options.output + '_' + pairConfigName + '.txt', 'w')

        for pair in deltaPairs:

            epSTD = options.ppmstd * 10 ** -6 * scanFDict[precMassClusters[pair[0]][0]]['precMass']
            
            # Get all possible true pairings from database search results
            uniquePeptideDict = defaultdict(lambda: {'light': [], 'heavy': []})
            for scanF in precMassClusters[pair[0]]:
                if scanF in processedInfo[progName]:
                    uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['light'] += [scanF]

            for scanF in precMassClusters[pair[1]]:
                if scanF in processedInfo[progName]:
                    uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['heavy'] += [scanF]


            mergedSpecDictLight = {}
            mergedSpecDictHeavy = {}
            for peptide in uniquePeptideDict:
                if len(uniquePeptideDict[peptide]['light']) > 0:
                    mergedSpecDictLight[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in uniquePeptideDict[peptide]['light']], epsilon = 2*epSTD)
                if len(uniquePeptideDict[peptide]['heavy']) > 0:
                    mergedSpecDictHeavy[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in uniquePeptideDict[peptide]['heavy']], epsilon = 2*epSTD)                

            for lightPept in mergedSpecDictLight:
        unimodDict = pickle.load(fin)
    hashedUnimodDict = hashUnimodDict(unimodDict)

    outFile = open(options.output, 'w')
    cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score']
    if 'Ambig Edges' in infoDict:
        cols.insert(2, 'Ambig Edges')
        
    outFile.write('\t'.join([col for col in cols]) + '\n')

    for entry in DataFile.getScanInfo(options.comp, delimiter='\t'):
        scanData = {}
        print "New scan", entry
        scanData['ScanF'] = entry[infoDict['ScanF']]
        scanData['Peptide'] = entry[infoDict['Peptide']]
        scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'])
        scanData['Score'] = entry[infoDict['Score']]
        scanData['Alignment Score'] = None
        if 'Ambig Edges' in infoDict:
            ambigEdges = eval(entry[infoDict['Ambig Edges']])
            scanData['Ambig Edges'] = ambigEdges
        else:
            ambigEdges = []

        massIntPairs = DataFile.getMassIntPairs(scanFDict[int(scanData['ScanF'])]['dta'])
        spec = PN.Spectrum(PNet, precMass, epsilon=2*epSTD, spectrum=massIntPairs)

        try:
            #Ignore de novo peptides with noncanonical amino acids for now
            epsilon = 2 * 10**-6 * options.ppmstd * An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)[-1]
        except KeyError: