示例#1
0
def safe_read_key(spectrum: Spectrum, key: str) -> Optional[float]:
    """ Read key from spectrum and convert to float or return 'None'.
    Tries to read the given key from the spectrum metadata and convert it to a float.
    In case an exception is thrown or the key is not present, returns 'None'.

    Parameters
    ----------
    spectrum:
        Spectrum from which to read the key.
    key:
        Key to be read from the spectrum metadata.

    Returns
    -------
        Either the key's value converted to float or 'None'.
    """

    value = spectrum.get(key, default=None)
    if value is not None:
        try:
            value = float(value)
        except ValueError:
            # RT is in format that can't be converted to float -> set rt to None
            value = None
    return value
示例#2
0
def test_clean_inchis_harmonize_strings():
    """Test if inchi strings are made consistent in style.
    """
    spectrum_in1 = Spectrum(mz=np.array([], dtype='float'),
                            intensities=np.array([], dtype='float'),
                            metadata={"inchi": 'InChI=1S/C6H12'})

    spectrum_in2 = Spectrum(mz=np.array([], dtype='float'),
                            intensities=np.array([], dtype='float'),
                            metadata={"inchi": '1S/C6H12'})

    spectrum1 = clean_inchis(spectrum_in1)
    spectrum2 = clean_inchis(spectrum_in2)
    assert spectrum1.get("inchi").startswith(
        '"InChI='), "InChI style not as expected"
    assert spectrum1 == spectrum2, 'after cleaning both spectra should be equal'
示例#3
0
def test_train_new_word2vec_model_with_logger_and_saving(tmp_path):
    """Test training of a dummy model and save it."""
    # Create fake corpus
    documents = []
    for i in range(100):
        spectrum = Spectrum(mz=numpy.linspace(i, 9+i, 10),
                            intensities=numpy.ones((10)).astype("float"),
                            metadata={})
        documents.append(SpectrumDocument(spectrum, n_decimals=1))
    # Train model and write to file
    filename = os.path.join(tmp_path, "test.model")
    model = train_new_word2vec_model(documents, iterations=20, filename=filename,
                                     size=20, progress_logger=True)

    # Test if file exists
    assert os.path.isfile(filename), "Could not find saved model file."

    # Test if saved model seems to be correct
    model = gensim.models.Word2Vec.load(filename)
    assert model.sg == 0, "Expected different default value."
    assert model.negative == 5, "Expected different default value."
    assert model.window == 500, "Expected different default value."
    assert model.alpha == 0.025, "Expected different default value."
    assert model.min_alpha == 0.02, "Expected different default value."
    assert model.epochs == 20, "Expected differnt number of epochs."
    assert model.wv.vector_size == 20, "Expected differnt vector size."
    assert len(model.wv.vocab) == 109, "Expected different number of words in vocab."
    assert model.wv.get_vector(documents[0].words[1]).shape[0] == 20, "Expected differnt vector size."
示例#4
0
def test_clean_compound_name_removing_known_non_name_parts():
    """Test difficult but representative examples."""
    test_name_strings = [
        [
            "MLS000863588-01!2-methoxy-3-methyl-9H-carbazole",
            "2-methoxy-3-methyl-9H-carbazole"
        ],
        ["NCGC00160217-01!SOPHOCARPINE", "SOPHOCARPINE"],
        ["0072_2-Mercaptobenzothiaz", "2-Mercaptobenzothiaz"],
        [
            r"MassbankEU:ET110206 NPE_327.1704_12.2|N-succinylnorpheniramine",
            "N-succinylnorpheniramine"
        ],
        ["Massbank:CE000307 Trans-Zeatin-[d5]", "Trans-Zeatin-[d5]"],
        ["HMDB:HMDB00500-718 4-Hydroxybenzoic acid", "4-Hydroxybenzoic acid"],
        ["MoNA:2346734 Piroxicam (Feldene)", "Piroxicam (Feldene)"],
        ["ReSpect:PS013405 option1|option2|option3", "option3"],
        ["ReSpect:PS013405 option1name", "option1name"],
        [
            "4,4-Dimethylcholest-8(9),24-dien-3.beta.-ol  231.2",
            "4,4-Dimethylcholest-8(9),24-dien-3.beta.-ol"
        ],
    ]
    for name_strings in test_name_strings:
        spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                               intensities=numpy.array([], dtype="float"),
                               metadata={"compound_name": name_strings[0]})

        spectrum = clean_compound_name(spectrum_in)

        assert spectrum.get("compound_name") == name_strings[
            1], "Expected different cleaned name."
示例#5
0
def test_modified_cosine_with_mass_shift_5():
    """Test modified cosine on two spectra with mass set shift."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 150, 200, 300, 500, 510, 1100], dtype="float"),
                          intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float"),
                          metadata={"precursor_mz": 1000.0})

    spectrum_2 = Spectrum(mz=numpy.array([55, 105, 205, 304.5, 494.5, 515.5, 1045], dtype="float"),
                          intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float"),
                          metadata={"precursor_mz": 1005.0})

    norm_spectrum_1 = normalize_intensities(spectrum_1)
    norm_spectrum_2 = normalize_intensities(spectrum_2)
    modified_cosine = ModifiedCosine()
    score = modified_cosine.pair(norm_spectrum_1, norm_spectrum_2)

    assert score["score"] == pytest.approx(0.081966, 0.0001), "Expected different cosine score."
    assert score["matches"] == 2, "Expected 2 matching peaks."
def test_select_by_relative_intensity_with_to_parameter_too_large():

    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([1, 10, 100, 1000], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    with pytest.raises(AssertionError):
        select_by_relative_intensity(spectrum_in, intensity_to=10.0)
示例#7
0
def test_add_fingerprint_no_smiles_no_inchi():
    """Test if fingerprint it generated correctly."""
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={})

    spectrum = add_fingerprint(spectrum_in)
    assert spectrum.get("fingerprint", None) is None, "Expected None."
示例#8
0
def test_add_losses_returns_new_spectrum_instance():
    """Test if no change is done to empty spectrum."""
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"))

    spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
示例#9
0
def test_add_losses_without_precursor_mz():
    """Test if no changes are done without having a precursor-m/z."""
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000], dtype="float"))

    spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
示例#10
0
def test_cosine_score_greedy_with_tolerance_2_0():

    spectrum_1 = Spectrum(mz=numpy.array([100, 200, 299, 300, 301, 500, 510], dtype="float"),
                          intensities=numpy.array([10, 10, 500, 100, 200, 20, 100], dtype="float"),
                          metadata=dict())

    spectrum_2 = Spectrum(mz=numpy.array([100, 200, 300, 301, 500, 512], dtype="float"),
                          intensities=numpy.array([10, 10, 500, 100, 20, 100], dtype="float"),
                          metadata=dict())

    norm_spectrum_1 = normalize_intensities(spectrum_1)
    norm_spectrum_2 = normalize_intensities(spectrum_2)
    cosine_greedy = CosineGreedyVectorial(tolerance=2.0)
    score, n_matches = cosine_greedy(norm_spectrum_1, norm_spectrum_2)

    assert score == pytest.approx(0.903412, 0.0001), "Expected different cosine score."
    assert n_matches == 6
示例#11
0
def test_modified_cosine_with_mass_shift_5_no_matches_expected():
    """Test modified cosine on two spectra with no expected matches."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"),
                          intensities=numpy.array([10, 10, 500], dtype="float"),
                          metadata={"precursor_mz": 1000.0})

    spectrum_2 = Spectrum(mz=numpy.array([120, 220, 320], dtype="float"),
                          intensities=numpy.array([10, 10, 500], dtype="float"),
                          metadata={"precursor_mz": 1005})

    norm_spectrum_1 = normalize_intensities(spectrum_1)
    norm_spectrum_2 = normalize_intensities(spectrum_2)
    modified_cosine = ModifiedCosine(tolerance=1.0)
    score = modified_cosine.pair(norm_spectrum_1, norm_spectrum_2)

    assert score["score"] == pytest.approx(0.0, 1e-5), "Expected different modified cosine score."
    assert score["matches"] == 0, "Expected 0 matching peaks."
def test_harmonize_undefined_inchi_na_3():

    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={"inchi": "NA"})

    spectrum = harmonize_undefined_inchi(spectrum_in)
    assert spectrum.get("inchi") == ""
示例#13
0
def test_spectra():
    """Returns a list with two spectra

    The spectra are created by using peaks from the first two spectra in
    100_test_spectra.pickle, to make sure that the peaks occur in the s2v
    model. The other values are random.
    """
    spectrum1 = Spectrum(
        mz=np.array([
            808.27356, 872.289917, 890.246277, 891.272888, 894.326416,
            904.195679, 905.224548, 908.183472, 922.178101, 923.155762
        ],
                    dtype="float"),
        intensities=np.array([
            0.11106008, 0.12347332, 0.16352988, 0.17101522, 0.17312992,
            0.19262333, 0.21442898, 0.42173288, 0.51071955, 1.
        ],
                             dtype="float"),
        metadata={
            'pepmass': (907.0, None),
            'spectrumid': 'CCMSLIB00000001760',
            'precursor_mz': 907.0,
            # 'precursor_mz': 905.9927235480093,
            'inchikey': 'SCYRNRIZFGMUSB-STOGWRBBSA-N',
            'charge': 1
        })
    spectrum2 = Spectrum(
        mz=np.array([
            538.003174, 539.217773, 556.030396, 599.352783, 851.380859,
            852.370605, 909.424438, 953.396606, 963.686768, 964.524658
        ],
                    dtype="float"),
        intensities=np.array([
            0.28046377, 0.28900242, 0.31933114, 0.32199162, 0.34214536,
            0.35616456, 0.36216307, 0.41616014, 0.71323034, 1.
        ],
                             dtype="float"),
        metadata={
            'pepmass': (928.0, None),
            'spectrumid': 'CCMSLIB00000001761',
            'precursor_mz': 928.0,
            # 'precursor_mz': 905.010782,
            'inchikey': 'SCYRNRIZFGMUSB-STOGWRBBSA-N',
            # 'charge': 1
        })
    return [spectrum1, spectrum2]
示例#14
0
def test_derive_ionmode_positive_adduct():
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={"adduct": "[M+H]"})

    spectrum = derive_ionmode(spectrum_in)

    assert spectrum.get("ionmode") == "positive", "Expected different ionmode."
def test_harmonize_undefined_inchikey_no_data():

    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={"inchikey": "no data"})

    spectrum = harmonize_undefined_inchikey(spectrum_in)
    assert spectrum.get("inchikey") == ""
示例#16
0
def test_modified_cosine_without_precursor_mz():
    """Test without precursor-m/z. Should raise assertion error."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 150, 200, 300, 500, 510, 1100], dtype="float"),
                          intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float"))

    spectrum_2 = Spectrum(mz=numpy.array([100, 140, 190, 300, 490, 510, 1090], dtype="float"),
                          intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float"))

    norm_spectrum_1 = normalize_intensities(spectrum_1)
    norm_spectrum_2 = normalize_intensities(spectrum_2)
    modified_cosine = ModifiedCosine()

    with pytest.raises(AssertionError) as msg:
        modified_cosine.pair(norm_spectrum_1, norm_spectrum_2)

    expected_message = "Precursor_mz missing. Apply 'add_precursor_mz' filter first."
    assert str(msg.value) == expected_message
示例#17
0
def test_add_losses_without_precursor_mz():
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000],
                                                   dtype="float"))

    spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
示例#18
0
def test_modified_cosine_with_mass_shift_5_tolerance_2():
    """Test modified cosine on two spectra with mass set shift and tolerance."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 200, 299, 300, 301, 500, 510], dtype="float"),
                          intensities=numpy.array([10, 10, 500, 100, 200, 20, 100], dtype="float"),
                          metadata={"precursor_mz": 1000.0})

    spectrum_2 = Spectrum(mz=numpy.array([105, 205, 305, 306, 505, 517], dtype="float"),
                          intensities=numpy.array([10, 10, 500, 100, 20, 100], dtype="float"),
                          metadata={"precursor_mz": 1005})

    norm_spectrum_1 = normalize_intensities(spectrum_1)
    norm_spectrum_2 = normalize_intensities(spectrum_2)
    modified_cosine = ModifiedCosine(tolerance=2.0)
    score = modified_cosine.pair(norm_spectrum_1, norm_spectrum_2)

    assert score["score"] == pytest.approx(0.96788, 0.0001), "Expected different modified cosine score."
    assert score["matches"] == 6, "Expected 6 matching peaks."
示例#19
0
def test_fingerprint_similarity_parallel_cosine_empty_fingerprint():
    """Test cosine score matrix with empty fingerprint."""
    fingerprint1 = numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    spectrum1 = Spectrum(mz=numpy.array([], dtype="float"),
                         intensities=numpy.array([], dtype="float"),
                         metadata={"fingerprint": fingerprint1})

    fingerprint2 = numpy.array([0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])
    spectrum2 = Spectrum(mz=numpy.array([], dtype="float"),
                         intensities=numpy.array([], dtype="float"),
                         metadata={"fingerprint": fingerprint2})

    similarity_measure = FingerprintSimilarityParallel(similarity_measure="cosine")
    score_matrix = similarity_measure([spectrum1, spectrum2],
                                      [spectrum1, spectrum2])
    assert score_matrix == pytest.approx(numpy.array([[0, 0],
                                                      [0, 1.]]), 0.001), "Expected different values."
def test_harmonize_undefined_smiles_empty_string():

    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={"smiles": ""})

    spectrum = harmonize_undefined_smiles(spectrum_in)
    assert spectrum.get("smiles") == ""
示例#21
0
def test_cosine_greedy_with_arrays_symmetric():
    """Test if matrix with is_symmetric=True works properly."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"),
                          intensities=numpy.array([0.1, 0.2, 1.0],
                                                  dtype="float"))

    spectrum_2 = Spectrum(mz=numpy.array([110, 190, 290], dtype="float"),
                          intensities=numpy.array([0.5, 0.2, 1.0],
                                                  dtype="float"))
    spectrums = [spectrum_1, spectrum_2]
    cosine_greedy = CosineGreedy()
    scores = cosine_greedy.matrix(spectrums, spectrums, is_symmetric=True)

    assert scores[0][0][0] == pytest.approx(
        scores[1][1][0], 0.000001), "Expected different cosine score."
    assert scores[0][1][0] == pytest.approx(
        scores[1][0][0], 0.000001), "Expected different cosine score."
def test_derive_formula_from_name_examples(string_addition, expected_formula):
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={"compound_name": "peptideXYZ [M+H+K] "+string_addition})

    spectrum = derive_formula_from_name(spectrum_in)

    assert spectrum.get("formula") == expected_formula, "Expected different formula."
示例#23
0
def test_precursormz_match_missing_precursormz():
    """Test with missing precursormz."""
    spectrum_1 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"precursor_mz": 100.0})

    spectrum_2 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={})

    similarity_score = PrecursorMzMatch(tolerance=2.0)

    with pytest.raises(AssertionError) as msg:
        _ = similarity_score.pair(spectrum_1, spectrum_2)

    expected_message_part = "Missing precursor m/z."
    assert expected_message_part in str(msg.value), "Expected particular error message."
def test_harmonize_undefined_smiles_alias_nan_undefined_is_na():

    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={"smiles": "nan"})

    spectrum = harmonize_undefined_smiles(
        spectrum_in, aliases=["nodata", "NaN", "Nan", "nan"], undefined="n/a")
    assert spectrum.get("smiles") == "n/a"
def test_derive_formula_from_name_no_name_given():
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={})

    spectrum = derive_formula_from_name(spectrum_in)

    assert spectrum.get("formula", None) is None, "Expected None for adduct."
    assert spectrum.get("compound_name", None) is None, "Expected None for name."
def test_derive_formula_from_name_default():
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"),
                           metadata={"compound_name": "peptideXYZ [M+H+K] C5H12NO2"})

    spectrum = derive_formula_from_name(spectrum_in)

    assert spectrum.get("formula") == "C5H12NO2", "Expected different formula."
    assert spectrum.get("compound_name") == "peptideXYZ [M+H+K]", "Expected different cleaned name."
def test_fingerprint_similarity_pair_calculations(test_method, expected_score):
    """Test cosine score pair with two fingerprint."""
    fingerprint1 = numpy.array(
        [1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0])
    spectrum1 = Spectrum(mz=numpy.array([], dtype="float"),
                         intensities=numpy.array([], dtype="float"),
                         metadata={"fingerprint": fingerprint1})

    fingerprint2 = numpy.array(
        [0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])
    spectrum2 = Spectrum(mz=numpy.array([], dtype="float"),
                         intensities=numpy.array([], dtype="float"),
                         metadata={"fingerprint": fingerprint2})

    similarity_measure = FingerprintSimilarity(similarity_measure=test_method)
    score_pair = similarity_measure.pair(spectrum1, spectrum2)
    assert score_pair == pytest.approx(expected_score,
                                       1e-6), "Expected different score."
示例#28
0
def test_reduce_to_number_of_peaks_no_params():
    """Use default parameters."""
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = reduce_to_number_of_peaks(spectrum_in)

    assert spectrum == spectrum_in, "Expected no changes."
示例#29
0
def test_spectrum_getters_return_copies():
    """Test if getters return (deep)copies so that edits won't change the original entries."""
    spectrum = Spectrum(mz=numpy.array([100.0, 101.0], dtype="float"),
                        intensities=numpy.array([0.4, 0.5], dtype="float"),
                        metadata={"testdata": 1})
    # Get entries and modify
    testdata = spectrum.get("testdata")
    testdata += 1
    assert spectrum.get("testdata") == 1, "Expected different entry"
    peaks_mz = spectrum.peaks.mz
    peaks_mz += 100.0
    assert numpy.all(spectrum.peaks.mz == numpy.array(
        [100.0, 101.0])), "Expected different peaks.mz"
    metadata = spectrum.metadata
    metadata["added_info"] = "this"
    assert spectrum.metadata == {
        'testdata': 1
    }, "Expected metadata to remain unchanged"
示例#30
0
def test_require_minimum_number_of_peaks_no_params():

    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    spectrum_in = Spectrum(mz=mz, intensities=intensities)

    spectrum = require_minimum_number_of_peaks(spectrum_in)

    assert spectrum is None, "Expected None because the number of peaks (4) is less than the default threshold (10)."