    def parse_results(content):
        """Get terms from a single MTI output"""
        if not content:
            return []

        terms = set()

        # Split content into non-empty lines
        lines = content.splitlines()
        for line in lines:
            topic = line.split('|')[1]

            # Look for mesh ID of the topic
            mesh_id_str = mesh_client.get_mesh_id_name(topic)[0]
            if not mesh_id_str:
                logger.warning('Mesh ID not found for "%s"' % topic)

            # Add mesh ID as a number without prefix
            assert mesh_id_str[0] in ['C', 'D'], \
                f"Supposedly impossible mesh ID found: {mesh_id_str}"
            is_concept = (mesh_id_str[0] == 'C')
            mesh_id = int(mesh_id_str[1:])
            terms.add((mesh_id, is_concept))

        return list(terms)
def get_grounding_from_name(name):
    """Return grounding given an agent name."""
    # See if it's a gene name
    hgnc_id = get_hgnc_id(name)
    if hgnc_id:
        return ('HGNC', hgnc_id)

    # Check if it's in the grounding map
        refs = gm[name]
        if isinstance(refs, dict):
            for dbn, dbi in refs.items():
                if dbn != 'TEXT':
                    return (dbn, dbi)
    # If not, search by text
    except KeyError:

    chebi_id = get_chebi_id_from_name(name)
    if chebi_id:
        return ('CHEBI', f'CHEBI:{chebi_id}')

    mesh_id, _ = get_mesh_id_name(name)
    if mesh_id:
        return ('MESH', mesh_id)

    return None
def test_mesh_term_name_norm():
    # For this one, the corresponding descriptor is D016922, which is in the
    # INDRA resource file; however, the descriptor name is "Cellular
    # Senescence".  This test verifies the expected behavior that in
    # offline-only mode, "Cellular Senescence" will return the correct
    # descriptor ID, but "Cell Aging" will not, unless using the REST service.
    query_name = 'Cellular Senescence'
    mesh_id, mesh_name = mesh_client.get_mesh_id_name(query_name, offline=True)
    assert mesh_id == 'D016922'
    assert mesh_name == query_name
    query_name = 'Cell Aging'
    mesh_id, mesh_name = mesh_client.get_mesh_id_name(query_name, offline=True)
    assert mesh_id is None
    assert mesh_name is None
    mesh_id, mesh_name = mesh_client.get_mesh_id_name(query_name, offline=False)
    assert mesh_id == 'D016922'
    assert mesh_name == 'Cellular Senescence'
def test_mesh_term_lookups():
    queries = {'Breast Cancer': ('D001943', 'Breast Neoplasms'),
               'Neoplasms': ('D009369', 'Neoplasms'),
               'Colorectal Cancer': ('D015179', 'Colorectal Neoplasms'),
               'Intestinal Neoplasms': ('D007414', 'Intestinal Neoplasms'),
               'Carcinoma, Non-Small-Cell Lung':
                                ('D002289', 'Carcinoma, Non-Small-Cell Lung'),
               'Prostate Cancer': ('D011471', 'Prostatic Neoplasms')}
    for query_term, (correct_id, correct_name) in queries.items():
        mesh_id, mesh_name = mesh_client.get_mesh_id_name(query_term)
        assert mesh_id == correct_id
        assert mesh_name == correct_name
def get_db_refs_by_name(ns, name, node_data):
    """Return standard name and grounding based on a namespace and a name.

    ns : str
        A name space in which the given name is interpreted.
    name : str
        The name in the given name space to get grounding for.
    node_data : dict
        Node data for logging purposes.

    name : str
        The standardized name for the given entity.
    db_refs : dict
        The grounding for the given entity.

    db_refs = None
    if ns == 'HGNC':
        # Assumption: name is an HGNC symbol
        hgnc_id = hgnc_client.get_current_hgnc_id(name)
        if not hgnc_id:
            logger.info("Invalid HGNC name: %s (%s)" % (name, node_data))
            return name, None
        elif isinstance(hgnc_id, list):
            logger.info('More than one current HGNC ID for %s, choosing %s' %
                        (name, hgnc_id[0]))
            hgnc_id = hgnc_id[0]
        name = hgnc_client.get_hgnc_name(hgnc_id)
        db_refs = {'HGNC': hgnc_id}
        up_id = _get_up_id(hgnc_id)
        if up_id:
            db_refs['UP'] = up_id
        mirbase_id = mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id)
        if mirbase_id:
            db_refs['MIRBASE'] = mirbase_id

    elif ns in ('UNIPROT', 'UP'):
        up_id = None
        # This is a simple test to see if name is a valid UniProt ID,
        # if we can't get a mnemonic, we assume it's not a UP ID
        if uniprot_client.get_mnemonic(name, web_fallback=False):
            up_id = name
        # We next check if it's a mnemonic
            up_id_from_mnem = uniprot_client.get_id_from_mnemonic(name)
            if up_id_from_mnem:
                up_id = up_id_from_mnem
        if not up_id:
            logger.info('Couldn\'t get UP ID from %s' % name)
            return name, None
        db_refs = {'UP': up_id}
        hgnc_id = uniprot_client.get_hgnc_id(up_id)
        if hgnc_id:
            db_refs['HGNC'] = hgnc_id
            name = hgnc_client.get_hgnc_name(hgnc_id)
            name = uniprot_client.get_gene_name(up_id)
    elif ns == 'FPLX':
        db_refs = {'FPLX': name}
    elif ns in ('GO', 'GOBP', 'GOCC'):
        if name == 'cell proliferation':
            name = 'cell population proliferation'
        go_id = go_client.get_go_id_from_label(name)
        if not go_id:
            logger.info('Could not find GO ID for %s' % name)
            return name, None
        db_refs = {'GO': go_id}
        name = go_client.get_go_label(go_id)
    elif ns in ('MESHPP', 'MESHD', 'MESH'):
        mesh_id, mesh_name = mesh_client.get_mesh_id_name(name)
        if not mesh_id:
            logger.info('Could not find MESH ID from %s' % name)
            return name, None
        name = mesh_name
        db_refs = {'MESH': mesh_id}
    # For now, handle MGI/RGD but putting the name into the db_refs so
    # it's clear what namespace the name belongs to
    # FIXME: Full implementation would look up MGI/RGD identifiers from
    # the names, and obtain corresponding Uniprot IDs
    elif ns == 'MGI':
        up_id = mouse_lookup.get(name)
        if up_id:
            db_refs = {'UP': up_id}
    elif ns == 'RGD':
        up_id = rat_lookup.get(name)
        if up_id:
            db_refs = {'UP': up_id}
    # Map Selventa families and complexes to FamPlex
    elif ns == 'SFAM':
        db_refs = {'SFAM': name}
        indra_name = bel_to_indra.get(name)
        if indra_name is None:
            logger.info('Could not find mapping for BEL/SFAM family: '
                        '%s (%s)' % (name, node_data))
            db_refs['FPLX'] = indra_name
            name = indra_name
    elif ns == 'SCOMP':
        db_refs = {'SCOMP': name}
        indra_name = bel_to_indra.get(name)
        if indra_name is None:
            logger.info('Could not find mapping for BEL/SCOMP complex: '
                        '%s (%s)' % (name, node_data))
            db_refs['FPLX'] = indra_name
            name = indra_name
    # Map Entrez genes to HGNC/UP
    elif ns in ('EGID', 'ENTREZ', 'NCBIGENE'):
        hgnc_id = hgnc_client.get_hgnc_from_entrez(name)
        db_refs = {'EGID': name}
        if hgnc_id is not None:
            db_refs['HGNC'] = hgnc_id
            name = hgnc_client.get_hgnc_name(hgnc_id)
            up_id = hgnc_client.get_uniprot_id(hgnc_id)
            if up_id:
                db_refs['UP'] = up_id
                    'HGNC entity %s with HGNC ID %s has no '
                    'corresponding Uniprot ID.', name, hgnc_id)
            mirbase_id = mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id)
            if mirbase_id:
                db_refs['MIRBASE'] = mirbase_id
            logger.debug('Could not map EGID%s to HGNC.' % name)
            name = 'E%s' % name
    elif ns == 'MIRBASE':
        mirbase_id = mirbase_client.get_mirbase_id_from_mirbase_name(name)
        if not mirbase_id:
            logger.info('Could not map miRBase name %s to ID', name)
            return name, None
        db_refs = {'MIRBASE': mirbase_id}
        hgnc_id = mirbase_client.get_hgnc_id_from_mirbase_id(mirbase_id)
        if hgnc_id:
            db_refs['HGNC'] = hgnc_id
            name = hgnc_client.get_hgnc_name(hgnc_id)
    # CHEBI
    elif ns == 'CHEBI':
        # We first look up BEL's own namespace map for ChEBI names to IDs
        chebi_id = chebi_name_id.get(name)
        # If that fails, we look up INDRA's ChEBI name to ID mapping
        if not chebi_id:
            chebi_id = chebi_client.get_chebi_id_from_name(name)
        if chebi_id:
            db_refs = {'CHEBI': chebi_id}
            logger.info('CHEBI name %s not found in map.' % name)
    # These appear in the name slot but are actually IDs
    elif ns == 'CHEBIID':
        chebi_id = identifiers.ensure_chebi_prefix(name)
        db_refs = {'CHEBI': chebi_id}
        name = chebi_client.get_chebi_name_from_id(chebi_id)
    # SDIS, SCHEM: Include the name as the ID for the namespace
    elif ns in ('SDIS', 'SCHEM', 'TEXT'):
        db_refs = {ns: name}
    elif ns == 'TAX':
        tid = taxonomy_client.get_taxonomy_id(name)
        if tid:
            db_refs = {'TAXONOMY': tid}
            logger.info('Could not get taxonomy ID for %s' % name)
        logger.info("Unhandled namespace: %s: %s (%s)" % (ns, name, node_data))
    return name, db_refs
def test_mesh_term_local_missing():
    mesh_term = 'XXXX'  # dummy term to make sure we don't have it offline
    mesh_id, mesh_name = mesh_client.get_mesh_id_name(mesh_term, offline=True)
    assert mesh_id is None
    assert mesh_name is None
def test_mesh_term_lookup_local():
    mesh_term = 'Glucosylceramides'
    (mesh_id, mesh_name) = mesh_client.get_mesh_id_name(mesh_term,
    assert mesh_id == 'D005963'
    assert mesh_name == mesh_term
def _urn_to_db_refs(urn):
    """Converts a Medscan URN to an INDRA db_refs dictionary with grounding

    urn : str
        A Medscan URN

    db_refs : dict
        A dictionary with grounding information, mapping databases to database
        identifiers. If the Medscan URN is not recognized, returns an empty
    db_name : str
        The Famplex name, if available; otherwise the HGNC name if available;
        otherwise None
    # Convert a urn to a db_refs dictionary
    if urn is None:
        return {}, None

    m = URN_PATT.match(urn)
    if m is None:
        return None, None

    urn_type, urn_id = m.groups()

    db_refs = {}
    db_name = None

    # TODO: support more types of URNs
    if urn_type == 'agi-cas':
        # Identifier is CAS, convert to CHEBI
        chebi_id = get_chebi_id_from_cas(urn_id)
        if chebi_id:
            db_refs['CHEBI'] = chebi_id
            db_name = get_chebi_name_from_id(chebi_id)
    elif urn_type == 'agi-llid':
        # This is an Entrez ID, convert to HGNC
        hgnc_id = get_hgnc_from_entrez(urn_id)
        if hgnc_id is not None:
            db_refs['HGNC'] = hgnc_id

            # Convert the HGNC ID to a Uniprot ID
            uniprot_id = get_uniprot_id(hgnc_id)
            if uniprot_id is not None:
                db_refs['UP'] = uniprot_id

            # Try to lookup HGNC name; if it's available, set it to the
            # agent name
            db_name = get_hgnc_name(hgnc_id)
    elif urn_type in [
            'agi-meshdis', 'agi-ncimorgan', 'agi-ncimtissue',
        if urn_id.startswith('C') and urn_id[1:].isdigit():
            # Identifier is probably UMLS
            db_refs['UMLS'] = urn_id
            # Identifier is MESH
            urn_mesh_name = unquote(urn_id)
            mesh_id, mesh_name = mesh_client.get_mesh_id_name(urn_mesh_name)
            if mesh_id:
                db_refs['MESH'] = mesh_id
                db_name = mesh_name
                db_name = urn_mesh_name
    elif urn_type == 'agi-gocomplex':
        # Identifier is GO
        db_refs['GO'] = 'GO:%s' % urn_id
    elif urn_type == 'agi-go':
        # Identifier is GO
        db_refs['GO'] = 'GO:%s' % urn_id

    # If we have a GO or MESH grounding, see if there is a corresponding
    # Famplex grounding
    db_sometimes_maps_to_famplex = ['GO', 'MESH']
    for db in db_sometimes_maps_to_famplex:
        if db in db_refs:
            key = (db, db_refs[db])
            if key in famplex_map:
                db_refs['FPLX'] = famplex_map[key]

    # If the urn corresponds to an eccode, groudn to famplex if that eccode
    # is in the Famplex equivalences table
    if urn.startswith('urn:agi-enz'):
        tokens = urn.split(':')
        eccode = tokens[2]
        key = ('ECCODE', eccode)
        if key in famplex_map:
            db_refs['FPLX'] = famplex_map[key]

    # If the Medscan URN itself maps to a Famplex id, add a Famplex grounding
    key = ('MEDSCAN', urn)
    if key in famplex_map:
        db_refs['FPLX'] = famplex_map[key]

    # If there is a Famplex grounding, use Famplex for entity name
    if 'FPLX' in db_refs:
        db_name = db_refs['FPLX']
    elif 'GO' in db_refs:
        db_name = go_client.get_go_label(db_refs['GO'])

    return db_refs, db_name
def test_mesh_term_local_missing():
    mesh_term = 'Rectal Tumors'
    mesh_id, mesh_name = mesh_client.get_mesh_id_name(mesh_term, offline=True)
    assert mesh_id is None
    assert mesh_name is None
