def post_process_normal(spectrum_in: SpectrumType, min_peaks: int = 10) \ -> Union[SpectrumType, None]: """Normal processing of spectra for Spec2Vec Parameters ---------- spectrum_in: Input spectrum. min_peaks: Minimum number of peaks to pass the spectrum (otherwise -> None) """ if spectrum_in is None: return None s = spectrum_in.clone() s = normalize_intensities(s) if any(np.isnan(s.peaks[1])): return None # remove spectra that have all intensities 0 s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=min_peaks) s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5) if s is None: return None # remove low peaks unless less than 10 peaks are left s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks # add losses to normally processed spectra s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0) return s
def test_reduce_to_number_of_peaks_ratio_given_but_no_parent_mass(): """A ratio_desired given without parent_mass should not result in changes.""" mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = reduce_to_number_of_peaks(spectrum_in, n_required=4, ratio_desired=0.1) assert spectrum == spectrum_in, "Expected the spectrum to remain unchanged."
def test_reduce_to_number_of_peaks_no_changes(metadata): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).with_metadata(metadata).build() spectrum = reduce_to_number_of_peaks(spectrum_in) assert spectrum == spectrum_in, "Expected no changes."
def test_reduce_to_number_of_peaks_no_params(): """Use default parameters.""" mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = reduce_to_number_of_peaks(spectrum_in) assert spectrum == spectrum_in, "Expected no changes."
def post_process_md(spectrum_in: SpectrumType, low_int_cutoff: float = 0.05, min_peaks: int = 10, max_peaks: int = 30) -> Union[SpectrumType, None]: """Processing of spectra that are used for mass difference extraction Parameters ---------- spectrum_in: Input spectrum. low_int_cutoff: Lower intensity cutoff for the peaks selected for MD min_peaks: Minimum number of peaks to pass the spectrum (otherwise -> None) max_peaks: Maximum number of peaks allowed in the spectrum (ranked on intensity) """ if spectrum_in is None: return None s = spectrum_in.clone() # remove precurzor_mz from spectra so neutral losses don't end up in MDs s = remove_precursor_mz_peak(s) s = normalize_intensities(s) if any(np.isnan(s.peaks[1])): return None # remove spectra that have all intensities 0 s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=min_peaks) s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5) if s is None: return None # remove low peaks unless less than 10 peaks are left s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks # do an additional removal step with a different intensity cutoff s_second_peak_removal = select_by_relative_intensity( s, intensity_from=low_int_cutoff) if len(s_second_peak_removal.peaks) >= 10: s = s_second_peak_removal # reduce to top30 peaks s = reduce_to_number_of_peaks(s, n_required=min_peaks, n_max=max_peaks) return s
def test_reduce_to_number_of_peaks_n_max_4(): mz = numpy.array([10, 20, 30, 40, 50], dtype="float") intensities = numpy.array([1, 1, 10, 20, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=4) assert len(spectrum.peaks) == 4, "Expected that only 4 peaks remain." assert spectrum.peaks.mz.tolist() == [20., 30., 40., 50.], "Expected different peaks to remain."
def test_reduce_to_number_of_peaks_desired_5_check_sorting(): """Check if mz and intensities order is sorted correctly """ mz = numpy.array([10, 20, 30, 40, 50, 60], dtype="float") intensities = numpy.array([5, 1, 4, 3, 100, 2], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=5) assert spectrum.peaks.intensities.tolist() == [5., 4., 3., 100., 2.], "Expected different intensities." assert spectrum.peaks.mz.tolist() == [10., 30., 40., 50., 60.], "Expected different peaks to remain."
def apply_my_filters(s): """This is how a user would typically design his own pre- and post- processing pipeline.""" s = default_filters(s) s = add_parent_mass(s) s = normalize_intensities(s) s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5) s = select_by_mz(s, mz_from=0, mz_to=1000) s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0) s = require_minimum_number_of_peaks(s, n_required=5) return s
def spectrum_processing(s): """This is how one would typically design a desired pre- and post- processing pipeline.""" s = default_filters(s) s = add_precursor_mz(s) s = normalize_intensities(s) s = reduce_to_number_of_peaks(s, n_required=5, ratio_desired=0.5, n_max=500) s = select_by_mz(s, mz_from=0, mz_to=1000) s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0) s = require_minimum_number_of_peaks(s, n_required=5) return s
def test_reduce_to_number_of_peaks_no_params_w_parent_mass(): """Use default parameters with present parent mass.""" mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata={"parent_mass": 50}) spectrum = reduce_to_number_of_peaks(spectrum_in) assert spectrum == spectrum_in, "Expected no changes."
def test_reduce_to_number_of_peaks_required_2_desired_3(): """Here: ratio_desired * parent_mass is 3, more than n_required.""" mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata={"parent_mass": 20}) spectrum = reduce_to_number_of_peaks(spectrum_in, n_required=3, n_max=4, ratio_desired=0.1) assert len(spectrum.peaks) == 3, "Expected that only 3 peaks remain." assert spectrum.peaks.mz.tolist() == [20., 30., 40.], "Expected different peaks to remain."
def test_reduce_to_number_of_peaks_ratio_given_but_no_parent_mass(): """A ratio_desired given without parent_mass should raise an exception.""" mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) with pytest.raises(Exception) as msg: _ = reduce_to_number_of_peaks(spectrum_in, n_required=4, ratio_desired=0.1) expected_msg = "Cannot use ratio_desired for spectrum without parent_mass." assert expected_msg in str( msg.value), "Expected specific exception message."
def test_reduce_to_number_of_peaks(mz, intensities, metadata, params, expected): spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).with_metadata(metadata).build() n_required, n_max, ratio_desired = params spectrum = reduce_to_number_of_peaks(spectrum_in, n_required=n_required, n_max=n_max, ratio_desired=ratio_desired) assert len( spectrum.peaks) == len(expected), "Expected that only 4 peaks remain." assert spectrum.peaks.mz.tolist( ) == expected, "Expected different peaks to remain."
def post_process(s): s = normalize_intensities(s) s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=10) try: s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5) except: pass if s is None: return None s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0) return s
def test_reduce_to_number_of_peaks_desired_5_check_sorting(): """Check if mz and intensities order is sorted correctly """ mz = numpy.array([10, 20, 30, 40, 50, 60], dtype="float") intensities = numpy.array([5, 1, 4, 3, 100, 2], dtype="float") metadata = {"parent_mass": 20} spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).with_metadata(metadata).build() spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=5) assert spectrum.peaks.intensities.tolist() == [ 5., 4., 3., 100., 2. ], "Expected different intensities." assert spectrum.peaks.mz.tolist() == [ 10., 30., 40., 50., 60. ], "Expected different peaks to remain."
def test_reduce_to_number_of_peaks_n_max_4(): """Test setting n_max parameter.""" mz = numpy.array([10, 20, 30, 40, 50], dtype="float") intensities = numpy.array([1, 1, 10, 20, 100], dtype="float") spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).build() spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=4) expected = numpy.array([20, 30, 40, 50], dtype="float") assert len( spectrum.peaks) == len(expected), "Expected that only 4 peaks remain." numpy.testing.assert_array_equal( spectrum.peaks.mz, expected, err_msg="Expected different peaks to remain.")
def test_reduce_to_number_of_peaks_set_to_none(): """Test is spectrum is set to None if not enough peaks.""" set_matchms_logger_level("INFO") mz = numpy.array([10, 20], dtype="float") intensities = numpy.array([0.5, 1], dtype="float") spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).with_metadata({ "parent_mass": 50 }).build() with LogCapture() as log: spectrum = reduce_to_number_of_peaks(spectrum_in, n_required=5) assert spectrum is None, "Expected spectrum to be set to None." log.check( ('matchms', 'INFO', "Spectrum with 2 (<5) peaks was set to None.")) reset_matchms_logger()
def spectrum_processing_s2v( spectrum: SpectrumType, **settings: Union[int, float]) -> Union[SpectrumType]: """Spectrum processing required for computing Spec2Vec scores. Args: ---------- spectrum: Spectrum to process mz_from: Peaks below this value are removed. Default = 10.0 mz_to: Peaks above this value are removed. Default = 1000.0 n_required Number of minimal required peaks for a spectrum to be considered. n_max Maximum number of peaks to be kept per spectrum. Default is 1000. loss_mz_from Minimum allowed m/z value for losses. Default is 0.0. loss_mz_to Maximum allowed m/z value for losses. Default is 1000.0. """ settings = set_spec2vec_defaults(**settings) spectrum = select_by_mz(spectrum, mz_from=settings["mz_from"], mz_to=settings["mz_to"]) spectrum = reduce_to_number_of_peaks(spectrum, n_required=settings["n_required"], n_max=settings["n_max"]) spectrum = add_losses(spectrum, loss_mz_from=settings["loss_mz_from"], loss_mz_to=settings["loss_mz_to"]) assert spectrum is not None, \ "Expects Spectrum that has high enough quality and is not None" return spectrum
def test_empty_spectrum(): spectrum_in = None spectrum = reduce_to_number_of_peaks(spectrum_in) assert spectrum is None, "Expected different handling of None spectrum."