def perform_peptide_match(self):
     self.matched_peptides = {}
     self.peptide_ms1_profiles = {}
     self.retention_times = []
     self.match_data = []
     t0 = timer.time()
     for scan_number in self.ms1_indices:
         self.retention_times.append(self.scan_list[scan_number]['retentionTime'])
         self.match_data.append(self.match_peptide_patterns_to_ms1profile(self.profiles[scan_number]))
         for peptide in self.match_data[-1].keys():
             if peptide in self.matched_peptides:
                 for z in self.match_data[-1][peptide].keys():
                     self.matched_peptides[peptide][z] = 1
                     if z in self.peptide_ms1_profiles[peptide]:
                         self.peptide_ms1_profiles[peptide][z] = mspy.combine(self.peptide_ms1_profiles[peptide][z] ,mspy.crop(
                             self.profiles[scan_number],
                             self.peptide_indexed_iso_dist[peptide][z][0][0] - 0.1,
                             self.peptide_indexed_iso_dist[peptide][z][-1][0] + 0.1))
                     else:
                         self.peptide_ms1_profiles[peptide][z] = mspy.crop(
                             self.profiles[scan_number],
                             self.peptide_indexed_iso_dist[peptide][z][0][0] - 0.1,
                             self.peptide_indexed_iso_dist[peptide][z][-1][0] + 0.1)
             else:
                 self.matched_peptides[peptide] = {}
                 self.peptide_ms1_profiles[peptide] = {}
                 for z in self.match_data[-1][peptide].keys():
                     self.matched_peptides[peptide][z] = 1
                     self.peptide_ms1_profiles[peptide][z] = mspy.crop(
                         self.profiles[scan_number],
                         self.peptide_indexed_iso_dist[peptide][z][0][0] - 0.1,
                         self.peptide_indexed_iso_dist[peptide][z][-1][0] + 0.1)
     t1 = timer.time() - t0
     print 'Matched MS1 scans in %s ' % t1
Пример #2
0
def match_peptides_in_scans(
    peptides, PatternObjects, ScanList, ms1ScanList, profiles, charge_min, charge_max, mz_min, mz_max, files
):
    # Matches expected isotopic distributions of peptides in MS1 spectra

    t0 = timer.time()
    RetentionTimeData = {}
    BasepeakData = {}
    RmsdData = {}
    ProfileData = {}
    IntIntensity = {}
    # Iterate through peptide and initiate data structures
    for peptide in peptides:
        RetentionTimeData[peptide] = {}
        BasepeakData[peptide] = {}
        RmsdData[peptide] = {}
        ProfileData[peptide] = {}
        IntIntensity[peptide] = defaultdict(lambda: 1)
        # Iterate through charge states and initiate data structures
        for z in range(charge_min, charge_max + 1):
            RetentionTimeData[peptide][z] = {}
            BasepeakData[peptide][z] = {}
            RmsdData[peptide][z] = {}
            ProfileData[peptide][z] = {}

            # Iterate through files and initiate data structures
            for file_iter in files:
                RetentionTimeData[peptide][z][file_iter] = []
                BasepeakData[peptide][z][file_iter] = []
                RmsdData[peptide][z][file_iter] = []
                ProfileData[peptide][z][file_iter] = None
                IntIntensity[peptide][file_iter] += 0
                if PatternObjects[peptide][z][0][0] < mz_min or PatternObjects[peptide][z][-1][0] > mz_max:
                    continue
                # Iterate through scans
                for scan_number in ms1ScanList[file_iter]:
                    RetentionTimeData[peptide][z][file_iter].append(ScanList[file_iter][scan_number]["retentionTime"])
                    checkPatternResult = mspy.checkpattern_fast(
                        signal=profiles[file_iter][scan_number], pattern=PatternObjects[peptide][z]
                    )
                    if checkPatternResult is not None:
                        RmsdData[peptide][z][file_iter].append(checkPatternResult.rmsd)
                        BasepeakData[peptide][z][file_iter].append(checkPatternResult.basepeak)
                        if checkPatternResult.rmsd < RmsdThreshold:
                            if ProfileData[peptide][z][file_iter] is None:
                                ProfileData[peptide][z][file_iter] = mspy.crop(
                                    profiles[file_iter][scan_number],
                                    PatternObjects[peptide][z][0][0] - 0.1,
                                    PatternObjects[peptide][z][-1][0] + 0.1,
                                )
                            else:
                                ProfileData[peptide][z][file_iter] = mspy.combine(
                                    ProfileData[peptide][z][file_iter],
                                    mspy.crop(
                                        profiles[file_iter][scan_number],
                                        PatternObjects[peptide][z][0][0] - 0.1,
                                        PatternObjects[peptide][z][-1][0] + 0.1,
                                    ),
                                )
                            IntIntensity[peptide][file_iter] += checkPatternResult.basepeak
                    else:
                        RmsdData[peptide][z][file_iter].append(1)
                        BasepeakData[peptide][z][file_iter].append(0)
                if ProfileData[peptide][z][file_iter] is not None:
                    ProfileData[peptide][z][file_iter] = mspy.reduce(ProfileData[peptide][z][file_iter])
    t1 = timer.time() - t0

    print "Loaded files in %s " % t1

    MatchData = {}
    MatchData["RetentionTime"] = RetentionTimeData
    MatchData["Basepeak"] = BasepeakData
    MatchData["Rmsd"] = RmsdData
    MatchData["Profile"] = ProfileData
    MatchData["IntIntensity"] = IntIntensity
    return MatchData