Exemplo n.º 1
0
def post_process_normal(spectrum_in: SpectrumType, min_peaks: int = 10) \
        -> Union[SpectrumType, None]:
    """Normal processing of spectra for Spec2Vec

    Parameters
    ----------
    spectrum_in:
        Input spectrum.
    min_peaks:
        Minimum number of peaks to pass the spectrum (otherwise -> None)
    """
    if spectrum_in is None:
        return None

    s = spectrum_in.clone()
    s = normalize_intensities(s)
    if any(np.isnan(s.peaks[1])):
        return None  # remove spectra that have all intensities 0
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=min_peaks)
    s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5)
    if s is None:
        return None
    # remove low peaks unless less than 10 peaks are left
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks
    # add losses to normally processed spectra
    s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0)
    return s
def test_reduce_to_number_of_peaks_ratio_given_but_no_parent_mass():
    """A ratio_desired given without parent_mass should not result in changes."""
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = reduce_to_number_of_peaks(spectrum_in, n_required=4, ratio_desired=0.1)

    assert spectrum == spectrum_in, "Expected the spectrum to remain unchanged."
Exemplo n.º 3
0
def test_reduce_to_number_of_peaks_no_changes(metadata):
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).with_metadata(metadata).build()

    spectrum = reduce_to_number_of_peaks(spectrum_in)

    assert spectrum == spectrum_in, "Expected no changes."
Exemplo n.º 4
0
def test_reduce_to_number_of_peaks_no_params():
    """Use default parameters."""
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = reduce_to_number_of_peaks(spectrum_in)

    assert spectrum == spectrum_in, "Expected no changes."
Exemplo n.º 5
0
def post_process_md(spectrum_in: SpectrumType,
                    low_int_cutoff: float = 0.05,
                    min_peaks: int = 10,
                    max_peaks: int = 30) -> Union[SpectrumType, None]:
    """Processing of spectra that are used for mass difference extraction

    Parameters
    ----------
    spectrum_in:
        Input spectrum.
    low_int_cutoff:
        Lower intensity cutoff for the peaks selected for MD
    min_peaks:
        Minimum number of peaks to pass the spectrum (otherwise -> None)
    max_peaks:
        Maximum number of peaks allowed in the spectrum (ranked on intensity)
    """
    if spectrum_in is None:
        return None

    s = spectrum_in.clone()
    # remove precurzor_mz from spectra so neutral losses don't end up in MDs
    s = remove_precursor_mz_peak(s)
    s = normalize_intensities(s)
    if any(np.isnan(s.peaks[1])):
        return None  # remove spectra that have all intensities 0
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=min_peaks)
    s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5)
    if s is None:
        return None
    # remove low peaks unless less than 10 peaks are left
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks
    # do an additional removal step with a different intensity cutoff
    s_second_peak_removal = select_by_relative_intensity(
        s, intensity_from=low_int_cutoff)
    if len(s_second_peak_removal.peaks) >= 10:
        s = s_second_peak_removal

    # reduce to top30 peaks
    s = reduce_to_number_of_peaks(s, n_required=min_peaks, n_max=max_peaks)
    return s
def test_reduce_to_number_of_peaks_n_max_4():

    mz = numpy.array([10, 20, 30, 40, 50], dtype="float")
    intensities = numpy.array([1, 1, 10, 20, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=4)

    assert len(spectrum.peaks) == 4, "Expected that only 4 peaks remain."
    assert spectrum.peaks.mz.tolist() == [20., 30., 40., 50.], "Expected different peaks to remain."
def test_reduce_to_number_of_peaks_desired_5_check_sorting():
    """Check if mz and intensities order is sorted correctly """
    mz = numpy.array([10, 20, 30, 40, 50, 60], dtype="float")
    intensities = numpy.array([5, 1, 4, 3, 100, 2], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=5)

    assert spectrum.peaks.intensities.tolist() == [5., 4., 3., 100., 2.], "Expected different intensities."
    assert spectrum.peaks.mz.tolist() == [10., 30., 40., 50., 60.], "Expected different peaks to remain."
Exemplo n.º 8
0
 def apply_my_filters(s):
     """This is how a user would typically design his own pre- and post-
     processing pipeline."""
     s = default_filters(s)
     s = add_parent_mass(s)
     s = normalize_intensities(s)
     s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5)
     s = select_by_mz(s, mz_from=0, mz_to=1000)
     s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
     s = require_minimum_number_of_peaks(s, n_required=5)
     return s
Exemplo n.º 9
0
def spectrum_processing(s):
    """This is how one would typically design a desired pre- and post-
    processing pipeline."""
    s = default_filters(s)
    s = add_precursor_mz(s)
    s = normalize_intensities(s)
    s = reduce_to_number_of_peaks(s, n_required=5, ratio_desired=0.5, n_max=500)
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
    s = require_minimum_number_of_peaks(s, n_required=5)
    return s
Exemplo n.º 10
0
def test_reduce_to_number_of_peaks_no_params_w_parent_mass():
    """Use default parameters with present parent mass."""
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz,
                           intensities=intensities,
                           metadata={"parent_mass": 50})

    spectrum = reduce_to_number_of_peaks(spectrum_in)

    assert spectrum == spectrum_in, "Expected no changes."
def test_reduce_to_number_of_peaks_required_2_desired_3():
    """Here: ratio_desired * parent_mass is 3, more than n_required."""
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities,
                           metadata={"parent_mass": 20})

    spectrum = reduce_to_number_of_peaks(spectrum_in, n_required=3, n_max=4, ratio_desired=0.1)

    assert len(spectrum.peaks) == 3, "Expected that only 3 peaks remain."
    assert spectrum.peaks.mz.tolist() == [20., 30., 40.], "Expected different peaks to remain."
Exemplo n.º 12
0
def test_reduce_to_number_of_peaks_ratio_given_but_no_parent_mass():
    """A ratio_desired given without parent_mass should raise an exception."""
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    with pytest.raises(Exception) as msg:
        _ = reduce_to_number_of_peaks(spectrum_in,
                                      n_required=4,
                                      ratio_desired=0.1)

    expected_msg = "Cannot use ratio_desired for spectrum without parent_mass."
    assert expected_msg in str(
        msg.value), "Expected specific exception message."
Exemplo n.º 13
0
def test_reduce_to_number_of_peaks(mz, intensities, metadata, params,
                                   expected):
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).with_metadata(metadata).build()
    n_required, n_max, ratio_desired = params

    spectrum = reduce_to_number_of_peaks(spectrum_in,
                                         n_required=n_required,
                                         n_max=n_max,
                                         ratio_desired=ratio_desired)

    assert len(
        spectrum.peaks) == len(expected), "Expected that only 4 peaks remain."
    assert spectrum.peaks.mz.tolist(
    ) == expected, "Expected different peaks to remain."
Exemplo n.º 14
0
def post_process(s):
    s = normalize_intensities(s)
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=10)
    try:
        s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5)
    except:
        pass
    if s is None:
        return None
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks

    s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0)
    return s
Exemplo n.º 15
0
def test_reduce_to_number_of_peaks_desired_5_check_sorting():
    """Check if mz and intensities order is sorted correctly """
    mz = numpy.array([10, 20, 30, 40, 50, 60], dtype="float")
    intensities = numpy.array([5, 1, 4, 3, 100, 2], dtype="float")
    metadata = {"parent_mass": 20}
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).with_metadata(metadata).build()

    spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=5)

    assert spectrum.peaks.intensities.tolist() == [
        5., 4., 3., 100., 2.
    ], "Expected different intensities."
    assert spectrum.peaks.mz.tolist() == [
        10., 30., 40., 50., 60.
    ], "Expected different peaks to remain."
Exemplo n.º 16
0
def test_reduce_to_number_of_peaks_n_max_4():
    """Test setting n_max parameter."""
    mz = numpy.array([10, 20, 30, 40, 50], dtype="float")
    intensities = numpy.array([1, 1, 10, 20, 100], dtype="float")
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).build()

    spectrum = reduce_to_number_of_peaks(spectrum_in, n_max=4)

    expected = numpy.array([20, 30, 40, 50], dtype="float")

    assert len(
        spectrum.peaks) == len(expected), "Expected that only 4 peaks remain."
    numpy.testing.assert_array_equal(
        spectrum.peaks.mz,
        expected,
        err_msg="Expected different peaks to remain.")
Exemplo n.º 17
0
def test_reduce_to_number_of_peaks_set_to_none():
    """Test is spectrum is set to None if not enough peaks."""
    set_matchms_logger_level("INFO")
    mz = numpy.array([10, 20], dtype="float")
    intensities = numpy.array([0.5, 1], dtype="float")
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).with_metadata({
            "parent_mass": 50
        }).build()

    with LogCapture() as log:
        spectrum = reduce_to_number_of_peaks(spectrum_in, n_required=5)

    assert spectrum is None, "Expected spectrum to be set to None."
    log.check(
        ('matchms', 'INFO', "Spectrum with 2 (<5) peaks was set to None."))
    reset_matchms_logger()
Exemplo n.º 18
0
def spectrum_processing_s2v(
        spectrum: SpectrumType,
        **settings: Union[int, float]) -> Union[SpectrumType]:
    """Spectrum processing required for computing Spec2Vec scores.

    Args:
    ----------
    spectrum:
        Spectrum to process
    mz_from:
        Peaks below this value are removed. Default = 10.0
    mz_to:
        Peaks above this value are removed. Default = 1000.0
    n_required
        Number of minimal required peaks for a spectrum to be considered.
    n_max
        Maximum number of peaks to be kept per spectrum. Default is 1000.
    loss_mz_from
        Minimum allowed m/z value for losses. Default is 0.0.
    loss_mz_to
        Maximum allowed m/z value for losses. Default is 1000.0.
    """
    settings = set_spec2vec_defaults(**settings)
    spectrum = select_by_mz(spectrum,
                            mz_from=settings["mz_from"],
                            mz_to=settings["mz_to"])
    spectrum = reduce_to_number_of_peaks(spectrum,
                                         n_required=settings["n_required"],
                                         n_max=settings["n_max"])

    spectrum = add_losses(spectrum,
                          loss_mz_from=settings["loss_mz_from"],
                          loss_mz_to=settings["loss_mz_to"])
    assert spectrum is not None, \
        "Expects Spectrum that has high enough quality and is not None"
    return spectrum
Exemplo n.º 19
0
def test_empty_spectrum():
    spectrum_in = None
    spectrum = reduce_to_number_of_peaks(spectrum_in)

    assert spectrum is None, "Expected different handling of None spectrum."