Exemplo n.º 1
0
def get_grounding_from_name(name):
    """Return grounding given an agent name."""
    # See if it's a gene name
    hgnc_id = get_hgnc_id(name)
    if hgnc_id:
        return ('HGNC', hgnc_id)

    # Check if it's in the grounding map
    try:
        refs = gm[name]
        if isinstance(refs, dict):
            for dbn, dbi in refs.items():
                if dbn != 'TEXT':
                    return (dbn, dbi)
    # If not, search by text
    except KeyError:
        pass

    chebi_id = get_chebi_id_from_name(name)
    if chebi_id:
        return ('CHEBI', f'CHEBI:{chebi_id}')

    mesh_id, _ = get_mesh_id_name(name)
    if mesh_id:
        return ('MESH', mesh_id)

    return None
Exemplo n.º 2
0
def get_db_refs_by_name(ns, name, node_data):
    """Return standard name and grounding based on a namespace and a name.

    Parameters
    ----------
    ns : str
        A name space in which the given name is interpreted.
    name : str
        The name in the given name space to get grounding for.
    node_data : dict
        Node data for logging purposes.

    Returns
    -------
    name : str
        The standardized name for the given entity.
    db_refs : dict
        The grounding for the given entity.

    """
    db_refs = None
    if ns == 'HGNC':
        # Assumption: name is an HGNC symbol
        hgnc_id = hgnc_client.get_current_hgnc_id(name)
        if not hgnc_id:
            logger.info("Invalid HGNC name: %s (%s)" % (name, node_data))
            return name, None
        elif isinstance(hgnc_id, list):
            logger.info('More than one current HGNC ID for %s, choosing %s' %
                        (name, hgnc_id[0]))
            hgnc_id = hgnc_id[0]
        name = hgnc_client.get_hgnc_name(hgnc_id)
        db_refs = {'HGNC': hgnc_id}
        up_id = _get_up_id(hgnc_id)
        if up_id:
            db_refs['UP'] = up_id
        mirbase_id = mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id)
        if mirbase_id:
            db_refs['MIRBASE'] = mirbase_id

    elif ns in ('UNIPROT', 'UP'):
        up_id = None
        # This is a simple test to see if name is a valid UniProt ID,
        # if we can't get a mnemonic, we assume it's not a UP ID
        if uniprot_client.get_mnemonic(name, web_fallback=False):
            up_id = name
        # We next check if it's a mnemonic
        else:
            up_id_from_mnem = uniprot_client.get_id_from_mnemonic(name)
            if up_id_from_mnem:
                up_id = up_id_from_mnem
        if not up_id:
            logger.info('Couldn\'t get UP ID from %s' % name)
            return name, None
        db_refs = {'UP': up_id}
        hgnc_id = uniprot_client.get_hgnc_id(up_id)
        if hgnc_id:
            db_refs['HGNC'] = hgnc_id
            name = hgnc_client.get_hgnc_name(hgnc_id)
        else:
            name = uniprot_client.get_gene_name(up_id)
    elif ns == 'FPLX':
        db_refs = {'FPLX': name}
    elif ns in ('GO', 'GOBP', 'GOCC'):
        if name == 'cell proliferation':
            name = 'cell population proliferation'
        go_id = go_client.get_go_id_from_label(name)
        if not go_id:
            logger.info('Could not find GO ID for %s' % name)
            return name, None
        db_refs = {'GO': go_id}
        name = go_client.get_go_label(go_id)
    elif ns in ('MESHPP', 'MESHD', 'MESH'):
        mesh_id, mesh_name = mesh_client.get_mesh_id_name(name)
        if not mesh_id:
            logger.info('Could not find MESH ID from %s' % name)
            return name, None
        name = mesh_name
        db_refs = {'MESH': mesh_id}
    # For now, handle MGI/RGD but putting the name into the db_refs so
    # it's clear what namespace the name belongs to
    # FIXME: Full implementation would look up MGI/RGD identifiers from
    # the names, and obtain corresponding Uniprot IDs
    elif ns == 'MGI':
        up_id = mouse_lookup.get(name)
        if up_id:
            db_refs = {'UP': up_id}
    elif ns == 'RGD':
        up_id = rat_lookup.get(name)
        if up_id:
            db_refs = {'UP': up_id}
    # Map Selventa families and complexes to FamPlex
    elif ns == 'SFAM':
        db_refs = {'SFAM': name}
        indra_name = bel_to_indra.get(name)
        if indra_name is None:
            logger.info('Could not find mapping for BEL/SFAM family: '
                        '%s (%s)' % (name, node_data))
        else:
            db_refs['FPLX'] = indra_name
            name = indra_name
    elif ns == 'SCOMP':
        db_refs = {'SCOMP': name}
        indra_name = bel_to_indra.get(name)
        if indra_name is None:
            logger.info('Could not find mapping for BEL/SCOMP complex: '
                        '%s (%s)' % (name, node_data))
        else:
            db_refs['FPLX'] = indra_name
            name = indra_name
    # Map Entrez genes to HGNC/UP
    elif ns in ('EGID', 'ENTREZ', 'NCBIGENE'):
        hgnc_id = hgnc_client.get_hgnc_from_entrez(name)
        db_refs = {'EGID': name}
        if hgnc_id is not None:
            db_refs['HGNC'] = hgnc_id
            name = hgnc_client.get_hgnc_name(hgnc_id)
            up_id = hgnc_client.get_uniprot_id(hgnc_id)
            if up_id:
                db_refs['UP'] = up_id
            else:
                logger.info(
                    'HGNC entity %s with HGNC ID %s has no '
                    'corresponding Uniprot ID.', name, hgnc_id)
            mirbase_id = mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id)
            if mirbase_id:
                db_refs['MIRBASE'] = mirbase_id
        else:
            logger.debug('Could not map EGID%s to HGNC.' % name)
            name = 'E%s' % name
    elif ns == 'MIRBASE':
        mirbase_id = mirbase_client.get_mirbase_id_from_mirbase_name(name)
        if not mirbase_id:
            logger.info('Could not map miRBase name %s to ID', name)
            return name, None
        db_refs = {'MIRBASE': mirbase_id}
        hgnc_id = mirbase_client.get_hgnc_id_from_mirbase_id(mirbase_id)
        if hgnc_id:
            db_refs['HGNC'] = hgnc_id
            name = hgnc_client.get_hgnc_name(hgnc_id)
    # CHEBI
    elif ns == 'CHEBI':
        # We first look up BEL's own namespace map for ChEBI names to IDs
        chebi_id = chebi_name_id.get(name)
        # If that fails, we look up INDRA's ChEBI name to ID mapping
        if not chebi_id:
            chebi_id = chebi_client.get_chebi_id_from_name(name)
        if chebi_id:
            db_refs = {'CHEBI': chebi_id}
        else:
            logger.info('CHEBI name %s not found in map.' % name)
    # These appear in the name slot but are actually IDs
    elif ns == 'CHEBIID':
        chebi_id = identifiers.ensure_chebi_prefix(name)
        db_refs = {'CHEBI': chebi_id}
        name = chebi_client.get_chebi_name_from_id(chebi_id)
    # SDIS, SCHEM: Include the name as the ID for the namespace
    elif ns in ('SDIS', 'SCHEM', 'TEXT'):
        db_refs = {ns: name}
    elif ns == 'TAX':
        tid = taxonomy_client.get_taxonomy_id(name)
        if tid:
            db_refs = {'TAXONOMY': tid}
        else:
            logger.info('Could not get taxonomy ID for %s' % name)
    else:
        logger.info("Unhandled namespace: %s: %s (%s)" % (ns, name, node_data))
    return name, db_refs
Exemplo n.º 3
0
def get_db_refs_by_name(ns, name, node_data):
    """Return standard name and grounding based on a namespace and a name.

    Parameters
    ----------
    ns : str
        A name space in which the given name is interpreted.
    name : str
        The name in the given name space to get grounding for.
    node_data : dict
        Node data for logging purposes.

    Returns
    -------
    name : str
        The standardized name for the given entity.
    db_refs : dict
        The grounding for the given entity.
    """
    db_refs = None
    if ns == 'HGNC':
        hgnc_id = hgnc_client.get_hgnc_id(name)
        if not hgnc_id:
            logger.info("Invalid HGNC name: %s (%s)" % (name, node_data))
            return name, None
        db_refs = {'HGNC': hgnc_id}
        up_id = _get_up_id(hgnc_id)
        if up_id:
            db_refs['UP'] = up_id
        mirbase_id = mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id)
        if mirbase_id:
            db_refs['MIRBASE'] = mirbase_id

    elif ns in ('UNIPROT', 'UP'):
        up_id = None
        gene_name = uniprot_client.get_gene_name(name)
        if gene_name:
            up_id = name
        else:
            up_id_from_mnem = uniprot_client.get_id_from_mnemonic(name)
            if up_id_from_mnem:
                up_id = up_id_from_mnem
                gene_name = uniprot_client.get_gene_name(up_id)
        if not up_id:
            logger.info('Couldn\'t get UP ID from %s' % name)
            return name, None
        db_refs = {'UP': up_id}
        if uniprot_client.is_human(up_id):
            hgnc_id = hgnc_client.get_hgnc_id(gene_name)
            if not hgnc_id:
                logger.info('Uniprot ID linked to invalid human gene '
                            'name %s' % name)
            else:
                db_refs['HGNC'] = hgnc_id
    elif ns == 'FPLX':
        db_refs = {'FPLX': name}
    elif ns in ('GO', 'GOBP', 'GOCC'):
        go_id = go_client.get_go_id_from_label(name)
        if not go_id:
            logger.info('Could not find GO ID for %s' % name)
            return name, None
        db_refs = {'GO': go_id}
    elif ns in ('MESHPP', 'MESHD', 'MESH'):
        mesh_id = mesh_client.get_mesh_id_name(name)
        if not mesh_id:
            logger.info('Could not find MESH ID fro %s' % name)
            return name, None
        db_refs = {'MESH': mesh_id}
    # For now, handle MGI/RGD but putting the name into the db_refs so
    # it's clear what namespace the name belongs to
    # FIXME: Full implementation would look up MGI/RGD identifiers from
    # the names, and obtain corresponding Uniprot IDs
    elif ns in ('MGI', 'RGD'):
        db_refs = {ns: name}
    # Map Selventa families to FamPlexes
    elif ns == 'SFAM':
        db_refs = {'SFAM': name}
        indra_name = bel_to_indra.get(name)
        if indra_name is None:
            logger.info('Could not find mapping for BEL/SFAM family: '
                        '%s (%s)' % (name, node_data))
        else:
            db_refs['FPLX'] = indra_name
            name = indra_name
    # Map Entrez genes to HGNC/UP
    elif ns in ('EGID', 'ENTREZ', 'NCBIGENE'):
        hgnc_id = hgnc_client.get_hgnc_from_entrez(name)
        db_refs = {'EGID': name}
        if hgnc_id is not None:
            db_refs['HGNC'] = hgnc_id
            name = hgnc_client.get_hgnc_name(hgnc_id)
            up_id = hgnc_client.get_uniprot_id(hgnc_id)
            if up_id:
                db_refs['UP'] = up_id
            else:
                logger.info('HGNC entity %s with HGNC ID %s has no '
                            'corresponding Uniprot ID.',
                            name, hgnc_id)
            mirbase_id = mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id)
            if mirbase_id:
                db_refs['MIRBASE'] = mirbase_id
        else:
            logger.info('Could not map EGID%s to HGNC.' % name)
            name = 'E%s' % name
    elif ns == 'MIRBASE':
        mirbase_id = mirbase_client.get_mirbase_id_from_mirbase_name(name)
        if not mirbase_id:
            logger.info('Could not map miRBase name %s to ID', name)
            return
        db_refs = {'MIRBASE': mirbase_id}
        hgnc_id = mirbase_client.get_hgnc_id_from_mirbase_id(mirbase_id)
        if hgnc_id:
            db_refs['HGNC'] = hgnc_id
    # CHEBI
    elif ns == 'CHEBI':
        chebi_id = chebi_name_id.get(name)
        if not chebi_id:
            chebi_id = chebi_client.get_chebi_id_from_name(name)
        if chebi_id:
            db_refs = {'CHEBI': chebi_id}
        else:
            logger.info('CHEBI name %s not found in map.' % name)
    # SDIS, SCHEM: Include the name as the ID for the namespace
    elif ns in ('SDIS', 'SCHEM'):
        db_refs = {ns: name}
    else:
        logger.info("Unhandled namespace: %s: %s (%s)" % (ns, name,
                                                          node_data))
    return name, db_refs
Exemplo n.º 4
0
def test_chebi_name_to_id():
    cid = chebi_client.get_chebi_id_from_name('vemurafenib')
    assert cid == 'CHEBI:63637', cid
Exemplo n.º 5
0
def test_chebi_name_to_id():
    cid = chebi_client.get_chebi_id_from_name('vemurafenib')
    assert cid == '63637', cid