def extract_peaks(file,time_range): peak_data = {} run = pymzml.run.Reader(file) peak_data[file] = {} print(file) for spec in run: if spec.get('filter string')!= None and spec.get('total ion current') != None and spec.get('scan time') >= time_range[0] and spec.get('scan time') <= time_range[1]: if spec.get('filter string') in peak_data[file]: peak_data[file][spec.get('filter string')]['scan_count'] += 1 peak_data[file][spec.get('filter string')]['scan'] = mspy.combine(peak_data[file][spec.get('filter string')]['scan'], np.array(spec.peaks)) else: peak_data[file][spec.get('filter string')] = {} peak_data[file][spec.get('filter string')]['scan_count'] = 1 peak_data[file][spec.get('filter string')]['scan'] = np.array(spec.peaks) for scan in peak_data[file].keys(): peak_data[file][scan]['scan'] = mspy.reduce(peak_data[file][scan]['scan']) peak_data[file][scan]['scan'] = mspy.multiply(peak_data[file][scan]['scan'],y=1.0/peak_data[file][scan]['scan_count']) with open(file+scan+'.np', 'wb') as f: np.save(f,peak_data[file][scan]['scan'])
def integrate_match_data(self): # Creates peptide inensities dictionary with timeline of intensities # and sum of intensities for all charge states for each peptide self.peptide_intensities = {} self.peptide_intensities_int = {} # Preparing data structures for peptide in self.matched_peptides.keys(): self.peptide_intensities[peptide] = {} self.peptide_intensities_int[peptide] = 0.0 for z in self.matched_peptides[peptide].keys(): self.peptide_intensities[peptide][z] = [] self.peptide_ms1_profiles[peptide][z] = mspy.reduce( self.peptide_ms1_profiles[peptide][z]) # Summing over each data point for i, time in enumerate(self.retention_times): for peptide in self.matched_peptides.keys(): for z in self.matched_peptides[peptide].keys(): if peptide in self.match_data[i].keys() and z in self.match_data[i][peptide].keys(): self.peptide_intensities[peptide][z].append(self.match_data[i][peptide][z].basepeak) self.peptide_intensities_int[peptide] += self.match_data[i][peptide][z].basepeak else: self.peptide_intensities[peptide][z].append(0.0)
def match_peptides_in_scans( peptides, PatternObjects, ScanList, ms1ScanList, profiles, charge_min, charge_max, mz_min, mz_max, files ): # Matches expected isotopic distributions of peptides in MS1 spectra t0 = timer.time() RetentionTimeData = {} BasepeakData = {} RmsdData = {} ProfileData = {} IntIntensity = {} # Iterate through peptide and initiate data structures for peptide in peptides: RetentionTimeData[peptide] = {} BasepeakData[peptide] = {} RmsdData[peptide] = {} ProfileData[peptide] = {} IntIntensity[peptide] = defaultdict(lambda: 1) # Iterate through charge states and initiate data structures for z in range(charge_min, charge_max + 1): RetentionTimeData[peptide][z] = {} BasepeakData[peptide][z] = {} RmsdData[peptide][z] = {} ProfileData[peptide][z] = {} # Iterate through files and initiate data structures for file_iter in files: RetentionTimeData[peptide][z][file_iter] = [] BasepeakData[peptide][z][file_iter] = [] RmsdData[peptide][z][file_iter] = [] ProfileData[peptide][z][file_iter] = None IntIntensity[peptide][file_iter] += 0 if PatternObjects[peptide][z][0][0] < mz_min or PatternObjects[peptide][z][-1][0] > mz_max: continue # Iterate through scans for scan_number in ms1ScanList[file_iter]: RetentionTimeData[peptide][z][file_iter].append(ScanList[file_iter][scan_number]["retentionTime"]) checkPatternResult = mspy.checkpattern_fast( signal=profiles[file_iter][scan_number], pattern=PatternObjects[peptide][z] ) if checkPatternResult is not None: RmsdData[peptide][z][file_iter].append(checkPatternResult.rmsd) BasepeakData[peptide][z][file_iter].append(checkPatternResult.basepeak) if checkPatternResult.rmsd < RmsdThreshold: if ProfileData[peptide][z][file_iter] is None: ProfileData[peptide][z][file_iter] = mspy.crop( profiles[file_iter][scan_number], PatternObjects[peptide][z][0][0] - 0.1, PatternObjects[peptide][z][-1][0] + 0.1, ) else: ProfileData[peptide][z][file_iter] = mspy.combine( ProfileData[peptide][z][file_iter], mspy.crop( profiles[file_iter][scan_number], PatternObjects[peptide][z][0][0] - 0.1, PatternObjects[peptide][z][-1][0] + 0.1, ), ) IntIntensity[peptide][file_iter] += checkPatternResult.basepeak else: RmsdData[peptide][z][file_iter].append(1) BasepeakData[peptide][z][file_iter].append(0) if ProfileData[peptide][z][file_iter] is not None: ProfileData[peptide][z][file_iter] = mspy.reduce(ProfileData[peptide][z][file_iter]) t1 = timer.time() - t0 print "Loaded files in %s " % t1 MatchData = {} MatchData["RetentionTime"] = RetentionTimeData MatchData["Basepeak"] = BasepeakData MatchData["Rmsd"] = RmsdData MatchData["Profile"] = ProfileData MatchData["IntIntensity"] = IntIntensity return MatchData