def perform_peptide_match(self): self.matched_peptides = {} self.peptide_ms1_profiles = {} self.retention_times = [] self.match_data = [] t0 = timer.time() for scan_number in self.ms1_indices: self.retention_times.append(self.scan_list[scan_number]['retentionTime']) self.match_data.append(self.match_peptide_patterns_to_ms1profile(self.profiles[scan_number])) for peptide in self.match_data[-1].keys(): if peptide in self.matched_peptides: for z in self.match_data[-1][peptide].keys(): self.matched_peptides[peptide][z] = 1 if z in self.peptide_ms1_profiles[peptide]: self.peptide_ms1_profiles[peptide][z] = mspy.combine(self.peptide_ms1_profiles[peptide][z] ,mspy.crop( self.profiles[scan_number], self.peptide_indexed_iso_dist[peptide][z][0][0] - 0.1, self.peptide_indexed_iso_dist[peptide][z][-1][0] + 0.1)) else: self.peptide_ms1_profiles[peptide][z] = mspy.crop( self.profiles[scan_number], self.peptide_indexed_iso_dist[peptide][z][0][0] - 0.1, self.peptide_indexed_iso_dist[peptide][z][-1][0] + 0.1) else: self.matched_peptides[peptide] = {} self.peptide_ms1_profiles[peptide] = {} for z in self.match_data[-1][peptide].keys(): self.matched_peptides[peptide][z] = 1 self.peptide_ms1_profiles[peptide][z] = mspy.crop( self.profiles[scan_number], self.peptide_indexed_iso_dist[peptide][z][0][0] - 0.1, self.peptide_indexed_iso_dist[peptide][z][-1][0] + 0.1) t1 = timer.time() - t0 print 'Matched MS1 scans in %s ' % t1
def match_peptides_in_scans( peptides, PatternObjects, ScanList, ms1ScanList, profiles, charge_min, charge_max, mz_min, mz_max, files ): # Matches expected isotopic distributions of peptides in MS1 spectra t0 = timer.time() RetentionTimeData = {} BasepeakData = {} RmsdData = {} ProfileData = {} IntIntensity = {} # Iterate through peptide and initiate data structures for peptide in peptides: RetentionTimeData[peptide] = {} BasepeakData[peptide] = {} RmsdData[peptide] = {} ProfileData[peptide] = {} IntIntensity[peptide] = defaultdict(lambda: 1) # Iterate through charge states and initiate data structures for z in range(charge_min, charge_max + 1): RetentionTimeData[peptide][z] = {} BasepeakData[peptide][z] = {} RmsdData[peptide][z] = {} ProfileData[peptide][z] = {} # Iterate through files and initiate data structures for file_iter in files: RetentionTimeData[peptide][z][file_iter] = [] BasepeakData[peptide][z][file_iter] = [] RmsdData[peptide][z][file_iter] = [] ProfileData[peptide][z][file_iter] = None IntIntensity[peptide][file_iter] += 0 if PatternObjects[peptide][z][0][0] < mz_min or PatternObjects[peptide][z][-1][0] > mz_max: continue # Iterate through scans for scan_number in ms1ScanList[file_iter]: RetentionTimeData[peptide][z][file_iter].append(ScanList[file_iter][scan_number]["retentionTime"]) checkPatternResult = mspy.checkpattern_fast( signal=profiles[file_iter][scan_number], pattern=PatternObjects[peptide][z] ) if checkPatternResult is not None: RmsdData[peptide][z][file_iter].append(checkPatternResult.rmsd) BasepeakData[peptide][z][file_iter].append(checkPatternResult.basepeak) if checkPatternResult.rmsd < RmsdThreshold: if ProfileData[peptide][z][file_iter] is None: ProfileData[peptide][z][file_iter] = mspy.crop( profiles[file_iter][scan_number], PatternObjects[peptide][z][0][0] - 0.1, PatternObjects[peptide][z][-1][0] + 0.1, ) else: ProfileData[peptide][z][file_iter] = mspy.combine( ProfileData[peptide][z][file_iter], mspy.crop( profiles[file_iter][scan_number], PatternObjects[peptide][z][0][0] - 0.1, PatternObjects[peptide][z][-1][0] + 0.1, ), ) IntIntensity[peptide][file_iter] += checkPatternResult.basepeak else: RmsdData[peptide][z][file_iter].append(1) BasepeakData[peptide][z][file_iter].append(0) if ProfileData[peptide][z][file_iter] is not None: ProfileData[peptide][z][file_iter] = mspy.reduce(ProfileData[peptide][z][file_iter]) t1 = timer.time() - t0 print "Loaded files in %s " % t1 MatchData = {} MatchData["RetentionTime"] = RetentionTimeData MatchData["Basepeak"] = BasepeakData MatchData["Rmsd"] = RmsdData MatchData["Profile"] = ProfileData MatchData["IntIntensity"] = IntIntensity return MatchData