Exemplo n.º 1
0
def test_cas_to_chebi():
    assert chebi_client.get_chebi_id_from_cas('23261-20-3') == '18035'
    assert chebi_client.get_chebi_id_from_cas('100-51-6') == '17987'
    assert chebi_client.get_chebi_id_from_cas('-1') is None
Exemplo n.º 2
0
def test_cas_to_chebi():
    assert chebi_client.get_chebi_id_from_cas('23261-20-3') == '18035'
    assert chebi_client.get_chebi_id_from_cas('100-51-6') == '17987'
    assert chebi_client.get_chebi_id_from_cas('-1') is None
Exemplo n.º 3
0
def _urn_to_db_refs(urn):
    """Converts a Medscan URN to an INDRA db_refs dictionary with grounding
    information.

    Parameters
    ----------
    url : str
        A Medscan URN

    Returns
    -------
    db_refs : dict
        A dictionary with grounding information, mapping databases to database
        identifiers. If the Medscan URN is not recognized, returns an empty
        dictionary.
    db_name : str
        The Famplex name, if available; otherwise the HGNC name if available;
        otherwise None
    """
    # Convert a urn to a db_refs dictionary
    if urn is None:
        return {}, None

    p = 'urn:([^:]+):([^:]+)'
    m = re.match(p, urn)
    if m is None:
        return None, None

    urn_type = m.group(1)
    urn_id = m.group(2)

    db_refs = {}
    db_name = None

    # TODO: support more types of URNs
    if urn_type == 'agi-cas':
        # Identifier is CAS, convert to CHEBI
        chebi_id = get_chebi_id_from_cas(urn_id)
        if chebi_id:
            db_refs['CHEBI'] = 'CHEBI:%s' % chebi_id
    elif urn_type == 'agi-llid':
        # This is an Entrez ID, convert to HGNC
        hgnc_id = get_hgnc_from_entrez(urn_id)
        if hgnc_id is not None:
            db_refs['HGNC'] = hgnc_id

            # Convert the HGNC ID to a Uniprot ID
            uniprot_id = get_uniprot_id(hgnc_id)
            db_refs['UP'] = uniprot_id

            # Try to lookup HGNC name; if it's available, set it to the
            # agent name
            db_name = get_hgnc_name(hgnc_id)
    elif urn_type == 'agi-ncimorgan':
        # Identifier is MESH
        db_refs['MESH'] = urn_id
    elif urn_type == 'agi-ncimcelltype':
        # Identifier is MESH
        db_refs['MESH'] = urn_id
    elif urn_type == 'agi-meshdis':
        # Identifier is MESH
        db_refs['MESHDIS'] = urn_id
    elif urn_type == 'agi-gocomplex':
        # Identifier is GO
        db_refs['GO'] = 'GO:%s' % urn_id
    elif urn_type == 'agi-go':
        # Identifier is GO
        db_refs['GO'] = 'GO:%s' % urn_id
    elif urn_type == 'agi-ncimtissue':
        # Identifier is MESH
        db_refs['MESH'] = urn_id

    # If we have a GO or MESH grounding, see if there is a corresponding
    # Famplex grounding
    db_sometimes_maps_to_famplex = ['GO', 'MESH']
    for db in db_sometimes_maps_to_famplex:
        if db in db_refs:
            key = (db, db_refs[db])
            if key in famplex_map:
                db_refs['FPLX'] = famplex_map[key]

    # If the urn corresponds to an eccode, groudn to famplex if that eccode
    # is in the Famplex equivalences table
    if urn.startswith('urn:agi-enz'):
        tokens = urn.split(':')
        eccode = tokens[2]
        key = ('ECCODE', eccode)
        if key in famplex_map:
            db_refs['FPLX'] = famplex_map[key]

    # If the Medscan URN itself maps to a Famplex id, add a Famplex grounding
    key = ('MEDSCAN', urn)
    if key in famplex_map:
        db_refs['FPLX'] = famplex_map[key]

    # If there is a Famplex grounding, use Famplex for entity name
    if 'FPLX' in db_refs:
        db_name = db_refs['FPLX']

    return db_refs, db_name
Exemplo n.º 4
0
def _urn_to_db_refs(urn):
    """Converts a Medscan URN to an INDRA db_refs dictionary with grounding
    information.

    Parameters
    ----------
    urn : str
        A Medscan URN

    Returns
    -------
    db_refs : dict
        A dictionary with grounding information, mapping databases to database
        identifiers. If the Medscan URN is not recognized, returns an empty
        dictionary.
    db_name : str
        The Famplex name, if available; otherwise the HGNC name if available;
        otherwise None
    """
    # Convert a urn to a db_refs dictionary
    if urn is None:
        return {}, None

    m = URN_PATT.match(urn)
    if m is None:
        return None, None

    urn_type, urn_id = m.groups()

    db_refs = {}
    db_name = None

    # TODO: support more types of URNs
    if urn_type == 'agi-cas':
        # Identifier is CAS, convert to CHEBI
        chebi_id = get_chebi_id_from_cas(urn_id)
        if chebi_id:
            db_refs['CHEBI'] = 'CHEBI:%s' % chebi_id
            db_name = get_chebi_name_from_id(chebi_id)
    elif urn_type == 'agi-llid':
        # This is an Entrez ID, convert to HGNC
        hgnc_id = get_hgnc_from_entrez(urn_id)
        if hgnc_id is not None:
            db_refs['HGNC'] = hgnc_id

            # Convert the HGNC ID to a Uniprot ID
            uniprot_id = get_uniprot_id(hgnc_id)
            if uniprot_id is not None:
                db_refs['UP'] = uniprot_id

            # Try to lookup HGNC name; if it's available, set it to the
            # agent name
            db_name = get_hgnc_name(hgnc_id)
    elif urn_type in ['agi-meshdis', 'agi-ncimorgan', 'agi-ncimtissue',
                      'agi-ncimcelltype']:
        if urn_id.startswith('C') and urn_id[1:].isdigit():
            # Identifier is probably UMLS
            db_refs['UMLS'] = urn_id
        else:
            # Identifier is MESH
            urn_mesh_name = unquote(urn_id)
            mesh_id, mesh_name = mesh_client.get_mesh_id_name(urn_mesh_name)
            if mesh_id:
                db_refs['MESH'] = mesh_id
                db_name = mesh_name
            else:
                db_name = urn_mesh_name
    elif urn_type == 'agi-gocomplex':
        # Identifier is GO
        db_refs['GO'] = 'GO:%s' % urn_id
    elif urn_type == 'agi-go':
        # Identifier is GO
        db_refs['GO'] = 'GO:%s' % urn_id

    # If we have a GO or MESH grounding, see if there is a corresponding
    # Famplex grounding
    db_sometimes_maps_to_famplex = ['GO', 'MESH']
    for db in db_sometimes_maps_to_famplex:
        if db in db_refs:
            key = (db, db_refs[db])
            if key in famplex_map:
                db_refs['FPLX'] = famplex_map[key]

    # If the urn corresponds to an eccode, groudn to famplex if that eccode
    # is in the Famplex equivalences table
    if urn.startswith('urn:agi-enz'):
        tokens = urn.split(':')
        eccode = tokens[2]
        key = ('ECCODE', eccode)
        if key in famplex_map:
            db_refs['FPLX'] = famplex_map[key]

    # If the Medscan URN itself maps to a Famplex id, add a Famplex grounding
    key = ('MEDSCAN', urn)
    if key in famplex_map:
        db_refs['FPLX'] = famplex_map[key]

    # If there is a Famplex grounding, use Famplex for entity name
    if 'FPLX' in db_refs:
        db_name = db_refs['FPLX']
    elif 'GO' in db_refs:
        db_name = go_client.get_go_label(db_refs['GO'])

    return db_refs, db_name