def test_penalized_msdeconv(self): scorer = PenalizedMSDeconVFitter(20, 2.0) # score = scorer.evaluate(None, experimental, theoretical) scores = [scorer.evaluate(None, experimental, theoretical) for i in range(10)] score = scores[0] print(scores) assert all([np.isclose(s, score) for s in scores[1:]]), scores self.assertAlmostEqual(score, 293.47483621051316, 3) score = scorer(None, experimental, theoretical) self.assertAlmostEqual(score, 293.47483621051316, 3)
def test_penalized_msdeconv(self): scorer = PenalizedMSDeconVFitter(20, 2.0) # score = scorer.evaluate(None, experimental, theoretical) scores = [ scorer.evaluate(None, experimental, theoretical) for i in range(10) ] score = scores[0] assert all([np.isclose(s, score) for s in scores[1:]]), scores self.assertAlmostEqual(score, 293.47483621051316, 3) score = scorer(None, experimental, theoretical) self.assertAlmostEqual(score, 293.47483621051316, 3)
def test_graph_deconvolution(self): scan = self.make_scan() scan.pick_peaks() self.assertIsNotNone(scan.peak_set) algorithm_type = CompositionListPeakDependenceGraphDeconvoluter decon_config = { "composition_list": self.compositions, "scorer": PenalizedMSDeconVFitter(5., 2.), "use_subtraction": True } deconresult = deconvolute_peaks(scan.peak_set, decon_config, charge_range=(-1, -8), deconvoluter_type=algorithm_type) dpeaks = deconresult.peak_set n_cases = sum(map(len, self.charges)) # assert len(dpeaks) == n_cases if not (len(dpeaks) == n_cases): tids, ions = self.make_tids() tids, ions = zip( *sorted(zip(tids, ions), key=lambda x: x[0].monoisotopic_mz)) seen = set() for i, dp in enumerate(sorted(dpeaks, key=lambda x: x.mz)): ix = self.get_nearest_index(dp.mz, tids) logger.warning("%0.3f %d %0.3f %r (Matched %d)", dp.neutral_mass, dp.charge, dp.score, dp.solution, ix) seen.add(ix) indices = set(range(len(ions))) missed = list(indices - seen) deconvoluter = algorithm_type(scan.peak_set.clone(), **decon_config) for ix in missed: tid = deconvoluter.generate_theoretical_isotopic_cluster( *ions[ix]) assert np.isclose(sum(p.intensity for p in tid), 1.0) monoisotopic_peak = deconvoluter.peaklist.has_peak( tid[0].mz, 2e-5) if monoisotopic_peak is not None: tid = deconvoluter.recalibrate_theoretical_mz( tid, monoisotopic_peak.mz) eid = deconvoluter.match_theoretical_isotopic_distribution( tid.peaklist, 2e-5) missed_peaks = count_placeholders(eid) deconvoluter.scale_theoretical_distribution(tid, eid) score = deconvoluter.scorer.evaluate(deconvoluter.peaklist, eid, tid.peaklist) fit_record = deconvoluter.fit_composition_at_charge(*ions[ix]) eid = fit_record.experimental tid = fit_record.theoretical rep_eid = drop_placeholders(eid) validation = (len(rep_eid) < 2), ( len(rep_eid) < len(tid) / 2.), (len(rep_eid) == 1 and fit_record.charge > 1) composition, charge = ions[ix] logger.warning( "Missed %r %d (%d missed peaks, score = %0.3f, record = %r, validation = %r)" % (composition, charge, missed_peaks, score, fit_record, validation)) assert not missed
def test_retrieve_deconvolution_solution(self): bunch = self.make_scan() scan = bunch.precursor scan.pick_peaks() ms1_deconvolution_args = { "averagine": glycopeptide, "scorer": PenalizedMSDeconVFitter(20., 2.), } priorities = [] for product in bunch.products: priorities.append(scan.has_peak(product.precursor_information.mz)) algorithm_type = AveraginePeakDependenceGraphDeconvoluter deconresult = deconvolute_peaks(scan.peak_set, ms1_deconvolution_args, priority_list=priorities, deconvoluter_type=algorithm_type) dpeaks = deconresult.peak_set deconvoluter = deconresult.deconvoluter priority_results = deconresult.priorities reference_deconvoluter = algorithm_type(scan.peak_set.clone(), **ms1_deconvolution_args) for i, result in enumerate(priority_results): query = priorities[i].mz if result is None: logger.warn("Query %d (%f) had no result", i, query) raw_peaks = scan.peak_set.between(query - 2, query + 3) anchor_peak = scan.peak_set.has_peak(query) deconvoluted_peaks = dpeaks.between(query - 2, query + 3, use_mz=True) possible_solutions = reference_deconvoluter._fit_all_charge_states( anchor_peak) sols = [] logger.warn("Possible Solutions %r", possible_solutions) if not possible_solutions: for charge in [3, 4, 5]: tid = reference_deconvoluter.averagine.isotopic_cluster( anchor_peak.mz, charge) assert np.isclose(tid.monoisotopic_mz, anchor_peak.mz) assert np.isclose(sum(p.intensity for p in tid), 1.0) eid = reference_deconvoluter.match_theoretical_isotopic_distribution( tid.peaklist, error_tolerance=2e-5) assert len(eid) == len(tid) record = reference_deconvoluter._evaluate_theoretical_distribution( eid, tid, anchor_peak, charge) sols.append(record) logger.warn("Manually Generated Solutions %r", sols) assert anchor_peak is not None and raw_peaks and ( possible_solutions or sols) and not deconvoluted_peaks assert deconvoluter.peak_dependency_network.find_solution_for( anchor_peak) is not None assert dpeaks.has_peak(query, use_mz=True) # error out assert result is not None else: assert 0 <= abs(result.mz - query) < 1 anchor_peak = scan.peak_set.has_peak(query) assert deconvoluter.peak_dependency_network.find_solution_for( anchor_peak) is not None
def test_processor(self): proc = processor.ScanProcessor(self.mzml_path, ms1_deconvolution_args={ "averagine": glycopeptide, "scorer": PenalizedMSDeconVFitter(5., 2.) }) for scan_bunch in iter(proc): self.assertIsNotNone(scan_bunch) self.assertIsNotNone(scan_bunch.precursor) self.assertIsNotNone(scan_bunch.products)
def test_deconvolution(self): scan = self.make_scan() algorithm_type = AveragineDeconvoluter deconresult = deconvolute_peaks( scan.peak_set, { "averagine": peptide, "scorer": PenalizedMSDeconVFitter(5., 1.), "use_subtraction": False, }, left_search_limit=3, deconvoluter_type=algorithm_type) dpeaks = deconresult.peak_set assert len(dpeaks) == 6 for point in points: peak = dpeaks.has_peak(neutral_mass(point[0], point[1])) self.assertIsNotNone(peak)
def test_missing_charge_processing(self): proc = processor.ScanProcessor(self.missing_charge_mzml, ms1_deconvolution_args={ "averagine": glycopeptide, "scorer": PenalizedMSDeconVFitter(5., 2.) }) for scan_bunch in iter(proc): self.assertIsNotNone(scan_bunch) self.assertIsNotNone(scan_bunch.precursor) self.assertIsNotNone(scan_bunch.products) for product in scan_bunch.products: if product.precursor_information.defaulted: candidates = scan_bunch.precursor.peak_set.between( product.precursor_information.mz - 1, product.precursor_information.mz + 1) assert len(candidates) == 0
def test_complex_processor(self): proc = processor.ScanProcessor(self.complex_compressed_mzml, ms1_deconvolution_args={ "averagine": glycopeptide, "scorer": PenalizedMSDeconVFitter(20., 2.), "truncate_after": 0.95 }, msn_deconvolution_args={ "averagine": peptide, "scorer": MSDeconVFitter(10.), "truncate_after": 0.8 }) bunch = next(proc) assert len(bunch.products) == 5 for product in bunch.products: assert not product.precursor_information.defaulted recalculated_precursors = { 'scanId=1740086': 4640.00074242012, 'scanId=1740149': 4786.05878475792, 'scanId=1740226': 4640.007868154431, 'scanId=1740344': 4348.90894554512, 'scanId=1740492': 5005.1329902247435 } for product in bunch.products: mass = product.precursor_information.extracted_neutral_mass self.assertAlmostEqual(mass, recalculated_precursors[product.id], 2) proc.start_from_scan("scanId=1760847") bunch = next(proc) recalculated_precursors = { 'scanId=1761168': 4640.01972225792, 'scanId=1761235': 4640.019285920238, 'scanId=1761325': 4786.07251976387, 'scanId=1761523': 4696.016295197582, 'scanId=1761804': 986.58798612896 } for product in bunch.products: mass = product.precursor_information.extracted_neutral_mass self.assertAlmostEqual(mass, recalculated_precursors[product.id], 2)
def test_graph_deconvolution(self): scan = self.make_scan() scan.pick_peaks() self.assertIsNotNone(scan.peak_set) algorithm_type = AveraginePeakDependenceGraphDeconvoluter deconresult = deconvolute_peaks( scan.peak_set, { "averagine": peptide, "scorer": PenalizedMSDeconVFitter(5., 1.) }, deconvoluter_type=algorithm_type) dpeaks = deconresult.peak_set assert len(dpeaks) == 2 deconvoluter = deconresult.deconvoluter for point in points: peak = dpeaks.has_peak(neutral_mass(point[0], point[1])) self.assertIsNotNone(peak) fp = scan.has_peak(peak.mz) self.assertAlmostEqual( deconvoluter.peak_dependency_network.find_solution_for(fp).mz, peak.mz, 3)