Exemplo n.º 1
0
def test_filter_by_name(
    elastic_repository_molecule: ElasticRepository,
    indigo_fixture: Indigo,
    loaded_sdf: IndigoRecordMolecule,
    resource_loader,
):
    mol = indigo_fixture.loadMoleculeFromFile(
        resource_loader("molecules/composition1.mol"))
    elastic_repository_molecule.index_record(
        IndigoRecordMolecule(indigo_object=mol))
    time.sleep(1)
    result = elastic_repository_molecule.filter(name="Composition1")
    for item in result:
        assert item.name == "Composition1"

    result = elastic_repository_molecule.filter(
        similarity=TanimotoSimilarityMatch(
            IndigoRecordMolecule(indigo_object=mol), 0.1))

    i = 0
    for _ in result:
        i += 1
    assert i == 10

    result = elastic_repository_molecule.filter(
        similarity=TanimotoSimilarityMatch(
            IndigoRecordMolecule(indigo_object=mol), 0.1),
        name="Composition1",
    )

    for item in result:
        assert item.name == "Composition1"
Exemplo n.º 2
0
def test_similaririty_matches_reactions(
    elastic_repository_reaction: ElasticRepository,
    loaded_rxns,
    resource_loader,
    indigo_fixture,
) -> None:

    reaction = indigo_fixture.loadReactionFromFile(
        resource_loader("reactions/rheadb/50353.rxn"))

    reaction_rec = IndigoRecordMolecule(indigo_object=reaction)

    for found_reaction in elastic_repository_reaction.filter(
            similarity=TanimotoSimilarityMatch(reaction_rec, 0.99)):
        assert (as_iob(
            found_reaction,
            indigo_fixture).countReactants() == reaction.countReactants())

    for found_reaction in elastic_repository_reaction.filter(
            similarity=EuclidSimilarityMatch(reaction_rec, 0.99)):
        assert (as_iob(
            found_reaction,
            indigo_fixture).countReactants() == reaction.countReactants())

    for found_reaction in elastic_repository_reaction.filter(
            similarity=TverskySimilarityMatch(reaction_rec, 0.99)):
        assert (as_iob(
            found_reaction,
            indigo_fixture).countReactants() == reaction.countReactants())

    for found_reaction in elastic_repository_reaction.filter(
            exact=reaction_rec):
        assert (as_iob(
            found_reaction,
            indigo_fixture).countReactants() == reaction.countReactants())
Exemplo n.º 3
0
def test_exact_match(
    elastic_repository_molecule: ElasticRepository,
    indigo_fixture: Indigo,
    loaded_sdf: IndigoRecordMolecule,
):
    result = elastic_repository_molecule.filter(exact=loaded_sdf)
    assert (loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles() ==
            next(result).as_indigo_object(indigo_fixture).canonicalSmiles())
Exemplo n.º 4
0
def test_substructure_search(
    elastic_repository_molecule: ElasticRepository,
    indigo_fixture: Indigo,
    loaded_sdf: IndigoRecordMolecule,
):
    result = elastic_repository_molecule.filter(substructure=loaded_sdf)
    for item in result:
        assert (item.as_indigo_object(indigo_fixture).canonicalSmiles() ==
                loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles())
Exemplo n.º 5
0
def test_similarity_matches(
    elastic_repository_molecule: ElasticRepository,
    indigo_fixture: Indigo,
    loaded_sdf: IndigoRecordMolecule,
):
    for sim_alg in [
            TanimotoSimilarityMatch(loaded_sdf, 0.9),
            EuclidSimilarityMatch(loaded_sdf, 0.9),
            TverskySimilarityMatch(loaded_sdf, 0.9, 0.5, 0.5),
    ]:
        result = elastic_repository_molecule.filter(similarity=sim_alg)
        assert (loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles(
        ) == next(result).as_indigo_object(indigo_fixture).canonicalSmiles())
Exemplo n.º 6
0
def test_wildcard_search(
    elastic_repository: ElasticRepository,
    indigo_fixture: Indigo,
    loaded_sdf: IndigoRecord,
    resource_loader: Callable[[str], str],
):
    mol = indigo_fixture.loadMoleculeFromFile(
        resource_loader("resources/composition1.mol"))
    elastic_repository.index_record(IndigoRecord(indigo_object=mol))
    time.sleep(1)
    result = elastic_repository.filter(name=WildcardQuery("Comp*"))
    for item in result:
        assert item.name == "Composition1"
Exemplo n.º 7
0
def test_range_search(
    elastic_repository_molecule: ElasticRepository,
    indigo_fixture: Indigo,
    resource_loader,
):
    for i, item in enumerate(
            iterate_file(
                Path(resource_loader("molecules/rand_queries_small.sdf")))):
        item.ind_number = i
        elastic_repository_molecule.index_record(item)
    result = elastic_repository_molecule.filter(ind_number=RangeQuery(1, 10))
    i = 0
    for _ in result:
        i += 1
    assert i == 10
Exemplo n.º 8
0
def test_search_empty_fingerprint(
    elastic_repository: ElasticRepository,
    indigo_fixture: Indigo,
    resource_loader: Callable[[str], str],
):
    for smile in ["[H][H]", "[H][F]"]:
        rec = IndigoRecord(indigo_object=indigo_fixture.loadMolecule(smile),
                           skip_errors=True)
        elastic_repository.index_record(rec)
    time.sleep(5)
    result = elastic_repository.filter(exact=IndigoRecord(
        indigo_object=indigo_fixture.loadMolecule("[H][H]"), skip_errors=True))

    assert ("[H][H]" == next(result).as_indigo_object(
        indigo_fixture).canonicalSmiles())
    with pytest.raises(StopIteration):
        next(result).as_indigo_object(indigo_fixture).canonicalSmiles()
Exemplo n.º 9
0
def test_custom_fields(
    elastic_repository: ElasticRepository,
    indigo_fixture: Indigo,
    loaded_sdf: IndigoRecord,
    resource_loader: Callable[[str], str],
):

    mol = indigo_fixture.loadMoleculeFromFile(
        resource_loader("resources/composition1.mol"))
    rec = IndigoRecord(indigo_object=mol,
                       PUBCHEM_IUPAC_INCHIKEY="RDHQFKQIGNGIED-UHFFFAOYSA-N")
    elastic_repository.index_record(rec)
    time.sleep(1)
    result = elastic_repository.filter(
        PUBCHEM_IUPAC_INCHIKEY="RDHQFKQIGNGIED-UHFFFAOYSA-N")
    for item in result:
        assert item.PUBCHEM_IUPAC_INCHIKEY == "RDHQFKQIGNGIED-UHFFFAOYSA-N"
Exemplo n.º 10
0
def test_create_reaction(
    elastic_repository_reaction: ElasticRepository,
    indigo_fixture,
    resource_loader,
) -> None:
    reaction = indigo_fixture.loadReactionFromFile(
        resource_loader("reactions/rheadb/58029.rxn"))
    indigo_reaction = IndigoRecordReaction(indigo_object=reaction)
    test_smiles = set([
        reactant.canonicalSmiles() for reactant in reaction.iterateReactants()
    ])
    count_reactants = reaction.countReactants()
    count_products = reaction.countProducts()
    assert isinstance(indigo_reaction, IndigoRecordReaction)
    elastic_repository_reaction.index_record(indigo_reaction)
    sleep(1)
    for found_react in elastic_repository_reaction.filter(
            similarity=SimilarityMatch(indigo_reaction, 0.9)):
        found_react = as_iob(found_react, indigo_fixture)
        assert count_products == found_react.countProducts()
        assert count_reactants == found_react.countReactants()
        for reactant in found_react.iterateReactants():
            assert reactant.canonicalSmiles() in test_smiles
Exemplo n.º 11
0
from bingo_elastic.elastic import ElasticRepository
from bingo_elastic.model import helpers
from indigo import Indigo

if __name__ == '__main__':
    indigo = Indigo()
    start_time = time.time()

    repository = ElasticRepository(host="127.0.0.1", port=9200)
    # try:
    #     sdf = helpers.iterate_sdf("../data/pubchem/Compound_000000001_000500000.sdf")
    #     repository.index_records(sdf)
    # except ValueError as e:
    #     print(e)
    #
    #
    # time.sleep(300)

    print(
        "--- {} seconds for reading and indexing data ---".format(time.time() -
                                                                  start_time))

    mol = indigo.loadMolecule(
        "Cc1ccc2nc(-c3ccc(NC(C4N(C(c5cccs5)=O)CCC4)=O)cc3)sc2c1")
    target = IndigoRecord(indigo_object=mol)

    alg = SimilarityMatch(target, 0.5)
    similar_records = repository.filter(similarity=alg, limit=20)
    print(similar_records)
    print("--- {} seconds ---".format(time.time() - start_time))