Python ParentmassMatch.matrix примеры использования

Язык программирования: Python

Пространство имен/Пакет: matchms.similarity

Класс/Тип: ParentmassMatch

Метод/Функция: matrix

Примеров на hotexamples.com: 3

Python ParentmassMatch.matrix - 3 примера найдено. Это лучшие примеры Python кода для matchms.similarity.ParentmassMatch.matrix, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ParentmassMatch(5)

matrix(3)

pair(2)

Основные методы

ParentmassMatch (5)

matrix (3)

pair (2)

Пример #1

Показать файл

def test_parentmass_match_array_symmetric():
    """Test with array and is_symmetric=True."""
    spectrum_1 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 100.0})

    spectrum_2 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 101.0})

    spectrum_3 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 99.95})

    spectrum_4 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 98.0})

    spectrums = [spectrum_1, spectrum_2, spectrum_3, spectrum_4]
    similarity_score = ParentmassMatch()
    scores = similarity_score.matrix(spectrums, spectrums, is_symmetric=True)
    scores2 = similarity_score.matrix(spectrums, spectrums, is_symmetric=False)

    assert numpy.all(scores == scores2), "Expected identical scores"
    assert numpy.all(scores == numpy.array([
        [True, False, True, False], [False, True, False, False],
        [True, False, True, False], [False, False, False, True]
    ])), "Expected different scores"

Пример #2

Показать файл

def test_parentmass_match_tolerance2_array():
    """Test with array and tolerance=2."""
    spectrum_1 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 100.0})

    spectrum_2 = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 101.0})

    spectrum_a = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 99.0})

    spectrum_b = Spectrum(mz=numpy.array([], dtype="float"),
                          intensities=numpy.array([], dtype="float"),
                          metadata={"parent_mass": 98.0})

    similarity_score = ParentmassMatch(tolerance=2.0)
    scores = similarity_score.matrix([spectrum_1, spectrum_2],
                                     [spectrum_a, spectrum_b])
    assert numpy.all(scores == numpy.array([[True, True], [True, False]
                                            ])), "Expected different scores."

Пример #3

Показать файл

Файл: library_search.py Проект: lozanocelia/spec2vec_gnps_data_analysis

def library_matching(documents_query: List[SpectrumDocument],
                     documents_library: List[SpectrumDocument],
                     model,
                     presearch_based_on=["parentmass", "spec2vec-top10"],
                     ignore_non_annotated: bool = True,
                     include_scores=["spec2vec", "cosine", "modcosine"],
                     intensity_weighting_power: float = 0.5,
                     allowed_missing_percentage: float = 0,
                     cosine_tol: float = 0.005,
                     mass_tolerance: float = 1.0):
    """Selecting potential spectra matches with spectra library.

    Suitable candidates will be selected by 1) top_n Spec2Vec similarity, and 2)
    same precursor mass (within given mz_ppm tolerance(s)).
    For later matching routines, additional scores (cosine, modified cosine)
    are added as well.

    Args:
    --------
    documents_query:
        List containing all spectrum documents that should be queried against the library.
    documents_library:
        List containing all library spectrum documents.
    model:
        Pretrained word2Vec model.
    top_n: int, optional
        Number of entries witht the top_n highest Spec2Vec scores to keep as
        found matches. Default = 10.
    ignore_non_annotated: bool, optional
        If True, only annotated spectra will be considered for matching.
        Default = True.
    cosine_tol: float, optional
        Set tolerance for the cosine and modified cosine score. Default = 0.005
    mass_tolerance
        Specify tolerance for a parentmass match.
    """

    # Initializations
    found_matches = []
    m_mass_matches = None
    m_spec2vec_similarities = None

    def get_metadata(documents):
        metadata = []
        for doc in documents:
            metadata.append(doc._obj.get("smiles"))
        return metadata

    library_spectra_metadata = get_metadata(documents_library)
    if ignore_non_annotated:
        # Get array of all ids for spectra with smiles
        library_ids = np.asarray(
            [i for i, x in enumerate(library_spectra_metadata) if x])
    else:
        library_ids = np.arange(len(documents_library))

    msg = "Presearch must be done either by 'parentmass' and/or 'spec2vec-topX'"
    assert "parentmass" in presearch_based_on or np.any(
        ["spec2vec" in x for x in presearch_based_on]), msg

    # 1. Search for top-n Spec2Vec matches ------------------------------------
    if np.any(["spec2vec" in x for x in presearch_based_on]):
        top_n = int([
            x.split("top")[1] for x in presearch_based_on if "spec2vec" in x
        ][0])
        print("Pre-selection includes spec2vec top {}.".format(top_n))
        spec2vec = Spec2Vec(
            model=model,
            intensity_weighting_power=intensity_weighting_power,
            allowed_missing_percentage=allowed_missing_percentage)
        m_spec2vec_similarities = spec2vec.matrix(
            [documents_library[i] for i in library_ids], documents_query)

        # Select top_n similarity values:
        selection_spec2vec = np.argpartition(m_spec2vec_similarities,
                                             -top_n,
                                             axis=0)[-top_n:, :]
    else:
        selection_spec2vec = np.empty((0, len(documents_query)), dtype="int")

    # 2. Search for parent mass based matches ---------------------------------
    if "parentmass" in presearch_based_on:
        mass_matching = ParentmassMatch(mass_tolerance)
        m_mass_matches = mass_matching.matrix(
            [documents_library[i]._obj for i in library_ids],
            [x._obj for x in documents_query])
        selection_massmatch = []
        for i in range(len(documents_query)):
            selection_massmatch.append(np.where(m_mass_matches[:, i] == 1)[0])
    else:
        selection_massmatch = np.empty((len(documents_query), 0), dtype="int")

    # 3. Combine found matches ------------------------------------------------
    for i in range(len(documents_query)):
        s2v_top_ids = selection_spec2vec[:, i]
        mass_match_ids = selection_massmatch[i]

        all_match_ids = np.unique(np.concatenate(
            (s2v_top_ids, mass_match_ids)))

        if len(all_match_ids) > 0:
            if "modcosine" in include_scores:
                # Get cosine score for found matches
                cosine_similarity = CosineGreedy(tolerance=cosine_tol)
                cosine_scores = []
                for match_id in library_ids[all_match_ids]:
                    cosine_scores.append(
                        cosine_similarity.matrix(
                            documents_library[match_id]._obj,
                            documents_query[i]._obj))
            else:
                cosine_scores = len(all_match_ids) * ["not calculated"]

            if "cosine" in include_scores:
                # Get modified cosine score for found matches
                mod_cosine_similarity = ModifiedCosine(tolerance=cosine_tol)
                mod_cosine_scores = []
                for match_id in library_ids[all_match_ids]:
                    mod_cosine_scores.append(
                        mod_cosine_similarity.matrix(
                            documents_library[match_id]._obj,
                            documents_query[i]._obj))
            else:
                mod_cosine_scores = len(all_match_ids) * ["not calculated"]

            matches_df = pd.DataFrame(
                {
                    "cosine_score": [x[0] for x in cosine_scores],
                    "cosine_matches": [x[1] for x in cosine_scores],
                    "mod_cosine_score": [x[0] for x in mod_cosine_scores],
                    "mod_cosine_matches": [x[1] for x in mod_cosine_scores]
                },
                index=library_ids[all_match_ids])

            if m_mass_matches is not None:
                matches_df["mass_match"] = m_mass_matches[all_match_ids, i]

            if m_spec2vec_similarities is not None:
                matches_df["s2v_score"] = m_spec2vec_similarities[
                    all_match_ids, i]
            elif "spec2vec" in include_scores:
                spec2vec_similarity = Spec2Vec(
                    model=model,
                    intensity_weighting_power=intensity_weighting_power,
                    allowed_missing_percentage=allowed_missing_percentage)
                spec2vec_scores = []
                for match_id in library_ids[all_match_ids]:
                    spec2vec_scores.append(
                        spec2vec_similarity.pair(documents_library[match_id],
                                                 documents_query[i]))
                matches_df["s2v_score"] = spec2vec_scores
            found_matches.append(matches_df.fillna(0))
        else:
            found_matches.append([])

    return found_matches