def post_process_normal(spectrum_in: SpectrumType, min_peaks: int = 10) \
        -> Union[SpectrumType, None]:
    """Normal processing of spectra for Spec2Vec

    Parameters
    ----------
    spectrum_in:
        Input spectrum.
    min_peaks:
        Minimum number of peaks to pass the spectrum (otherwise -> None)
    """
    if spectrum_in is None:
        return None

    s = spectrum_in.clone()
    s = normalize_intensities(s)
    if any(np.isnan(s.peaks[1])):
        return None  # remove spectra that have all intensities 0
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=min_peaks)
    s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5)
    if s is None:
        return None
    # remove low peaks unless less than 10 peaks are left
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks
    # add losses to normally processed spectra
    s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0)
    return s
示例#2
0
def test_require_minimum_number_of_peaks_required_4_ratio_none(spectrum_in: SpectrumType):
    """Test if parent_mass scaling is properly ignored when not passing ratio_required."""
    spectrum_in.set("parent_mass", 100)

    spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4)

    assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \
                                    "required number (4)."
示例#3
0
 def apply_my_filters(s):
     s = default_filters(s)
     s = add_parent_mass(s)
     s = normalize_intensities(s)
     s = select_by_relative_intensity(s, intensity_from=0.0, intensity_to=1.0)
     s = select_by_mz(s, mz_from=0, mz_to=1000)
     s = require_minimum_number_of_peaks(s, n_required=5)
     return s
示例#4
0
def test_require_minimum_number_of_peaks_no_params():

    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = require_minimum_number_of_peaks(spectrum_in)

    assert spectrum is None, "Expected None because the number of peaks (4) is less than the default threshold (10)."
示例#5
0
def test_require_minimum_number_of_peaks_required_4():

    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4)

    assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \
                                    "required number (4)."
示例#6
0
 def apply_my_filters(s):
     """This is how a user would typically design his own pre- and post-
     processing pipeline."""
     s = default_filters(s)
     s = add_parent_mass(s)
     s = normalize_intensities(s)
     s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5)
     s = select_by_mz(s, mz_from=0, mz_to=1000)
     s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
     s = require_minimum_number_of_peaks(s, n_required=5)
     return s
示例#7
0
def spectrum_processing(s):
    """This is how one would typically design a desired pre- and post-
    processing pipeline."""
    s = default_filters(s)
    s = add_precursor_mz(s)
    s = normalize_intensities(s)
    s = reduce_to_number_of_peaks(s, n_required=5, ratio_desired=0.5, n_max=500)
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
    s = require_minimum_number_of_peaks(s, n_required=5)
    return s
示例#8
0
def test_require_minimum_number_of_peaks_required_4_ratio_none():
    """Test if parent_mass scaling is properly ignored when not passing ratio_required."""
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    metadata = dict(parent_mass=100)
    spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)

    spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4)

    assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \
                                    "required number (4)."
示例#9
0
def test_require_minimum_number_of_peaks_required_5_or_1():

    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    metadata = dict(parent_mass=10)
    spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)

    spectrum = require_minimum_number_of_peaks(spectrum_in,
                                               n_required=5,
                                               ratio_required=0.1)

    assert spectrum is None, "Did not expect the spectrum to qualify because the number of peaks (4) is less " \
                             "than the required number (5)."
示例#10
0
def test_require_minimum_number_of_peaks_required_4_or_1():

    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    metadata = dict(parent_mass=10)
    spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)

    spectrum = require_minimum_number_of_peaks(spectrum_in,
                                               n_required=4,
                                               ratio_required=0.1)

    assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \
                                    "required number (4)."
示例#11
0
def post_process(s):
    s = normalize_intensities(s)
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=10)
    try:
        s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5)
    except:
        pass
    if s is None:
        return None
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks

    s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0)
    return s
def post_process_md(spectrum_in: SpectrumType,
                    low_int_cutoff: float = 0.05,
                    min_peaks: int = 10,
                    max_peaks: int = 30) -> Union[SpectrumType, None]:
    """Processing of spectra that are used for mass difference extraction

    Parameters
    ----------
    spectrum_in:
        Input spectrum.
    low_int_cutoff:
        Lower intensity cutoff for the peaks selected for MD
    min_peaks:
        Minimum number of peaks to pass the spectrum (otherwise -> None)
    max_peaks:
        Maximum number of peaks allowed in the spectrum (ranked on intensity)
    """
    if spectrum_in is None:
        return None

    s = spectrum_in.clone()
    # remove precurzor_mz from spectra so neutral losses don't end up in MDs
    s = remove_precursor_mz_peak(s)
    s = normalize_intensities(s)
    if any(np.isnan(s.peaks[1])):
        return None  # remove spectra that have all intensities 0
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=min_peaks)
    s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5)
    if s is None:
        return None
    # remove low peaks unless less than 10 peaks are left
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks
    # do an additional removal step with a different intensity cutoff
    s_second_peak_removal = select_by_relative_intensity(
        s, intensity_from=low_int_cutoff)
    if len(s_second_peak_removal.peaks) >= 10:
        s = s_second_peak_removal

    # reduce to top30 peaks
    s = reduce_to_number_of_peaks(s, n_required=min_peaks, n_max=max_peaks)
    return s
def post_process_classical(spectrum_in: SpectrumType, min_peaks: int = 10) \
        -> Union[SpectrumType, None]:
    """Processing of spectra for calculating classical scores

    Parameters
    ----------
    spectrum_in:
        Input spectrum.
    min_peaks:
        Minimum number of peaks to pass the spectrum (otherwise -> None)
    """
    if spectrum_in is None:
        return None

    s = spectrum_in.clone()
    s = normalize_intensities(s)
    if any(np.isnan(s.peaks[1])):
        return None  # remove spectra that have all intensities 0
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=min_peaks)
    s = select_by_relative_intensity(s, intensity_from=0.01, intensity_to=1.0)
    return s
示例#14
0
def test_require_minimum_number_of_peaks_required_5_or_10(spectrum_in: SpectrumType):
    spectrum_in.set("parent_mass", 100)
    spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=5, ratio_required=0.1)

    assert spectrum is None, "Did not expect the spectrum to qualify because the number of peaks (4) is less " \
                             "than the required number (10)."
示例#15
0
def test_require_minimum_number_of_peaks_no_params(spectrum_in: SpectrumType):
    spectrum = require_minimum_number_of_peaks(spectrum_in)

    assert spectrum is None, "Expected None because the number of peaks (4) is less than the default threshold (10)."
示例#16
0
def test_empty_spectrum():
    spectrum_in = None
    spectrum = require_minimum_number_of_peaks(spectrum_in)

    assert spectrum is None, "Expected different handling of None spectrum."
示例#17
0
def test_require_minimum_number_of_peaks_required_4_or_1(spectrum_in: SpectrumType):
    spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4, ratio_required=0.1)

    assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \
                                    "required number (4)."