Exemplo n.º 1
0
def test_update_pos_labels():
    """Test updating of positive labels in existing model."""
    ad1 = load_disambiguator('IR', path=TEST_MODEL_PATH)
    ad2 = load_disambiguator('IR', path=TEST_MODEL_PATH)
    ad2.update_pos_labels(ad1.pos_labels)
    assert ad1.classifier.stats == ad2.classifier.stats
    ad2.update_pos_labels(ad1.pos_labels + ['MESH:D007333'])
    assert set(ad2.pos_labels) == set(
        ['HGNC:6091', 'MESH:D011839', 'MESH:D007333'])
Exemplo n.º 2
0
def generate_adeft_terms():
    from adeft import available_shortforms
    from adeft.disambiguate import load_disambiguator
    all_term_args = set()
    for shortform in available_shortforms:
        da = load_disambiguator(shortform)
        for grounding in da.names.keys():
            if grounding == 'ungrounded' or ':' not in grounding:
                continue
            db_ns, db_id = grounding.split(':', maxsplit=1)
            if db_ns == 'HGNC':
                standard_name = hgnc_client.get_hgnc_name(db_id)
            elif db_ns == 'GO':
                standard_name = go_client.get_go_label(db_id)
            elif db_ns == 'MESH':
                standard_name = mesh_client.get_mesh_name(db_id)
            elif db_ns == 'CHEBI':
                standard_name = chebi_client.get_chebi_name_from_id(db_id)
            elif db_ns == 'FPLX':
                standard_name = db_id
            elif db_ns == 'UP':
                standard_name = uniprot_client.get_gene_name(db_id)
            else:
                logger.warning('Unknown grounding namespace from Adeft: %s' %
                               db_ns)
                continue
            term_args = (normalize(shortform), shortform, db_ns, db_id,
                         standard_name, 'synonym', 'adeft')
            all_term_args.add(term_args)
    terms = [
        Term(*term_args)
        for term_args in sorted(list(all_term_args), key=lambda x: x[0])
    ]
    return terms
Exemplo n.º 3
0
def test_dump_disambiguator():
    ad1 = load_disambiguator('IR', path=TEST_MODEL_PATH)
    tempname = uuid.uuid4().hex
    ad1.dump(tempname, path=SCRATCH_PATH)
    ad2 = load_disambiguator('IR', path=SCRATCH_PATH)

    assert ad1.grounding_dict == ad2.grounding_dict
    assert ad1.names == ad2.names
    assert ad1.pos_labels == ad2.pos_labels
    assert (array_equal(ad1.classifier.estimator.named_steps['logit'].coef_,
                        ad2.classifier.estimator.named_steps['logit'].coef_))
    assert ad1.info() == ad2.info(), (ad1.info(), ad2.info())
    try:
        shutil.rmtree(os.path.join(SCRATCH_PATH, tempname))
    except Exception:
        logger.warning('Could not clean up temporary folder %s'
                       % os.path.join(SCRATCH_PATH, tempname))
Exemplo n.º 4
0
def test_modify_groundings():
    """Test updating groundings of existing model."""
    ad = load_disambiguator('IR', path=TEST_MODEL_PATH)
    ad.modify_groundings(new_groundings={'HGNC:6091': 'UP:P06213'},
                         new_names={'HGNC:6091': 'Insulin Receptor'})

    assert 'UP:P06213' in ad.pos_labels
    assert 'UP:P06213' in ad.classifier.pos_labels
    assert 'UP:P06213' in ad.classifier.estimator.classes_
    assert 'UP:P06213' in ad.names
    assert 'UP:P06213' in ad.grounding_dict['IR'].values()
    assert ad.names['UP:P06213'] == 'Insulin Receptor'
Exemplo n.º 5
0
import logging
from indra.ontology.standardize \
    import standardize_agent_name

logger = logging.getLogger(__name__)

# If the adeft disambiguator is installed, load adeft models to
# disambiguate acronyms and shortforms
try:
    from adeft import available_shortforms as available_adeft_models
    from adeft.disambiguate import load_disambiguator
    adeft_disambiguators = {}
    for shortform in available_adeft_models:
        adeft_disambiguators[shortform] = load_disambiguator(shortform)
except Exception:
    logger.info('Adeft will not be available for grounding disambiguation.')
    adeft_disambiguators = {}


def run_adeft_disambiguation(stmt, agent, idx):
    """Run Adeft disambiguation on an Agent in a given Statement.

    This function looks at the evidence of the given Statement and attempts
    to look up the full paper or the abstract for the evidence. If both of
    those fail, the evidence sentence itself is used for disambiguation.
    The disambiguation model corresponding to the Agent text is then called,
    and the highest scoring returned grounding is set as the Agent's new
    grounding.

    The Statement's annotations as well as the Agent are modified in place
    and no value is returned.
Exemplo n.º 6
0
def test_load_disambiguator():
    ad = load_disambiguator('IR', path=TEST_MODEL_PATH)
    assert ad.shortforms == ['IR']
    assert hasattr(ad, 'classifier')
    assert hasattr(ad, 'recognizers')
Exemplo n.º 7
0
def test_modify_groundings_error():
    ad = load_disambiguator('IR', path=TEST_MODEL_PATH)
    ad.modify_groundings(new_groundings={'MESH:D011839': 'HGNC:6091'})
Exemplo n.º 8
0
from itertools import groupby, chain
from indra.statements import Agent
from indra.databases import uniprot_client, hgnc_client
from indra.util import read_unicode_csv, write_unicode_csv

logger = logging.getLogger(__name__)


# If the adeft disambiguator is installed, load adeft models to
# disambiguate acronyms and shortforms
try:
    from adeft import available_shortforms as available_adeft_models
    from adeft.disambiguate import load_disambiguator
    adeft_disambiguators = {}
    for shortform in available_adeft_models:
        adeft_disambiguators[shortform] = load_disambiguator(shortform)
except Exception:
    logger.info('DEFT will not be available for grounding disambiguation.')
    adeft_disambiguators = {}


class GroundingMapper(object):
    """Maps grounding of INDRA Agents based on a given grounding map.

    Parameters
    ----------
    gm : dict
        The grounding map, a dictionary mapping strings (entity names) to
        a dictionary of database identifiers.
    agent_map : Optional[dict]
        A dictionary mapping strings to grounded INDRA Agents with given state.
Exemplo n.º 9
0
def load_adeft_models():
    adeft_disambiguators = {}
    for shortform in available_adeft_models:
        adeft_disambiguators[shortform] = load_disambiguator(shortform)
    return adeft_disambiguators
Exemplo n.º 10
0
from indra.databases.hgnc_client import get_uniprot_id

from indra_db_lite import get_entrez_pmids_for_hgnc
from indra_db_lite import get_entrez_pmids_for_uniprot
from indra_db_lite import get_mesh_terms_for_grounding
from indra_db_lite import get_plaintexts_for_text_ref_ids
from indra_db_lite import get_pmids_for_mesh_term
from indra_db_lite import get_text_ref_ids_for_agent_text
from indra_db_lite import get_text_ref_ids_for_pmids

from opaque.nlp.featurize import BaselineTfidfVectorizer

logger = logging.getLogger(__file__)

models = {
    model_name: load_disambiguator(shortform)
    for shortform, model_name in available_shortforms.items()
}

reverse_model_map = {
    model_name: shortform
    for shortform, model_name in available_shortforms.items()
}


def get_groundings_for_disambiguator(disamb):
    result = set()
    for grounding_map in disamb.grounding_dict.values():
        for curie in grounding_map.values():
            result.add(curie)
    return list(result)