def getClusterPairingStats(lightSpecs, heavySpecs, lightPrecMass, pairConfig, epSTD = 0.01): lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) allPairedIonsDict = getAllPairedIonsDict(lightMergedSpec, heavyMergedSpec, lightPrecMass, pairConfig, epSTD) specPairedPRMs = {} pairTypeCount = {} for pairType in pairTypes: specPairedPRMs[pairType] = [] pairTypeCount[pairType] = 0 numLightInds = 0 numHeavyInds = 0 for heavyIons in allPairedIonsDict: deltaMasses = [] pairType = {'light': [], 'heavy': []} for ion in allPairedIonsDict[heavyIons]: pairType['light'] += [ion[1]] deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], lightMergedSpec[ion[0]][0], 0, 0)] for ion in heavyIons: pairType['heavy'] += [ion[1]] deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], heavyMergedSpec[ion[0]][0], pairConfig['NMod'], pairConfig['CMod'])] pairTypeString = ''.join(pairType['light']) + '_' + ''.join(pairType['heavy']) specPairedPRMs[pairTypeString] += [(sum(deltaMasses)/len(deltaMasses), (allPairedIonsDict[heavyIons], heavyIons))] pairTypeCount[pairTypeString] += 1 numLightInds += len(allPairedIonsDict[heavyIons]) numHeavyInds += len(heavyIons) sharedPeaksRatio = float(numLightInds + numHeavyInds)/(lightMergedSpec.shape[0] + heavyMergedSpec.shape[0]) return {'Cluster Paired PRM Information': specPairedPRMs, 'Shared Peaks Ratio': sharedPeaksRatio, 'Pair Type Stats': pairTypeCount, 'Light Merged Spec': lightMergedSpec, 'Heavy Merged Spec': heavyMergedSpec, 'Num Paired Ions': numLightInds + numHeavyInds}
def getPairs(pairs, xVals): for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = (float(paramsDict['ppmstd']['value'])) * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD) xVals.put([svmClassificationData]) return xVals
addEnds = DNS.getSpectrumGraphEndpointInitFunction(pairConfig['NStatic'], pairConfig['CStatic'], paramsDict['Enzyme']['specificity']) termModHash = Constants.getTermModHashForPairConfig(pairConfig) svmModel = svmutil.svm_load_model(parent + pairConfig['Model']) svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range') xVals = [] # xVals = getPairsThread(pairs) for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = options.ppmstd * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD) xVals += [svmClassificationData] xValsNorm = svmutil.normalize_instances(xVals, svmRange) pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0] print 'Pairs found. Time taken:', time.time() - t1, '\n' heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test']) heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol'] heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol']
# Get all possible true pairings from database search results uniquePeptideDict = defaultdict(lambda: {'light': [], 'heavy': []}) for scanF in precMassClusters[pair[0]]: if scanF in processedInfo[progName]: uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['light'] += [scanF] for scanF in precMassClusters[pair[1]]: if scanF in processedInfo[progName]: uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['heavy'] += [scanF] mergedSpecDictLight = {} mergedSpecDictHeavy = {} for peptide in uniquePeptideDict: if len(uniquePeptideDict[peptide]['light']) > 0: mergedSpecDictLight[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in uniquePeptideDict[peptide]['light']], epsilon = 2*epSTD) if len(uniquePeptideDict[peptide]['heavy']) > 0: mergedSpecDictHeavy[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in uniquePeptideDict[peptide]['heavy']], epsilon = 2*epSTD) for lightPept in mergedSpecDictLight: for heavyPept in mergedSpecDictHeavy: xVal = 1 if lightPept == heavyPept else -1 SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(mergedSpecDictLight[lightPept], mergedSpecDictHeavy[heavyPept], scanFDict[precMassClusters[pair[0]][0]]['precMass'], pairConfig['NMod'], pairConfig['CMod'], epsilon=2*epSTD) pairsOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # light peptide: %s, %s - heavy scans: %s, %s\n' % (lightPept, str(uniquePeptideDict[lightPept]['light']), heavyPept, str(uniquePeptideDict[heavyPept]['heavy']))) pairsOut.close()