Exemplo n.º 1
0
def remap_go_ids(stmts):
    for stmt in stmts:
        for agent in stmt.agent_list():
            if agent is not None and 'GO' in agent.db_refs:
                prim_id = go_client.get_primary_id(agent.db_refs['GO'])
                if prim_id:
                    agent.db_refs['GO'] = prim_id
Exemplo n.º 2
0
 def _get_db_refs(entity_term):
     db_refs = {}
     for xr in entity_term['xrefs']:
         ns = xr['namespace']
         if ns == 'uniprot':
             db_refs['UP'] = xr['id']
         elif ns == 'hgnc':
             db_refs['HGNC'] = xr['id']
         elif ns == 'pfam':
             fplx_id = famplex_map.get(('PF', xr['id']))
             if fplx_id:
                 db_refs['FPLX'] = fplx_id
             db_refs['PF'] = xr['id']
         elif ns == 'interpro':
             fplx_id = famplex_map.get(('IP', xr['id']))
             if fplx_id:
                 db_refs['FPLX'] = fplx_id
             db_refs['IP'] = xr['id']
         elif ns == 'chebi':
             db_refs['CHEBI'] = xr['id']
         elif ns == 'pubchem':
             db_refs['PUBCHEM'] = xr['id']
         elif ns == 'go':
             go_id = xr['id']
             # Handle secondary to primary mapping if necessary
             pri = go_client.get_primary_id(go_id)
             if pri:
                 go_id = pri
             db_refs['GO'] = go_id
         elif ns == 'mesh':
             db_refs['MESH'] = xr['id']
         elif ns == 'hmdb':
             db_refs['HMDB'] = xr['id']
         elif ns == 'simple_chemical':
             if xr['id'].startswith('HMDB'):
                 db_refs['HMDB'] = xr['id']
         # We handle "be" here for compatibility with older versions
         elif ns in ('fplx', 'be'):
             db_refs['FPLX'] = xr['id']
         # These name spaces are ignored
         elif ns in ['uaz']:
             pass
         else:
             logger.warning('Unhandled xref namespace: %s' % ns)
     db_refs['TEXT'] = entity_term['text']
     db_refs = standardize_db_refs(db_refs)
     return db_refs
Exemplo n.º 3
0
def test_go_secondary_to_primary():
    assert go_client.get_primary_id('GO:0007067') == 'GO:0000278'
Exemplo n.º 4
0
    def _get_db_refs(entity_term, organism_priority=None):
        db_refs = {}
        for xr in entity_term['xrefs']:
            ns = xr['namespace']
            if ns == 'uniprot':
                # Note: we add both full protein and protein chain
                # IDs here so that we can appli organism prioritization in
                # a uniform way. Later these will be separated out.
                up_id = xr['id']
                db_refs['UP'] = up_id
            elif ns == 'hgnc':
                db_refs['HGNC'] = xr['id']
            elif ns == 'pfam':
                fplx_id = famplex_map.get(('PF', xr['id']))
                if fplx_id:
                    db_refs['FPLX'] = fplx_id
                db_refs['PF'] = xr['id']
            elif ns == 'interpro':
                fplx_id = famplex_map.get(('IP', xr['id']))
                if fplx_id:
                    db_refs['FPLX'] = fplx_id
                db_refs['IP'] = xr['id']
            elif ns == 'chebi':
                db_refs['CHEBI'] = xr['id']
            elif ns == 'pubchem':
                db_refs['PUBCHEM'] = xr['id']
            elif ns == 'go':
                go_id = xr['id']
                # Handle secondary to primary mapping if necessary
                pri = go_client.get_primary_id(go_id)
                if pri:
                    go_id = pri
                db_refs['GO'] = go_id
            elif ns == 'mesh':
                db_refs['MESH'] = xr['id']
            elif ns == 'hmdb':
                db_refs['HMDB'] = xr['id']
            elif ns == 'simple_chemical':
                if xr['id'].startswith('HMDB'):
                    db_refs['HMDB'] = xr['id']
            # We handle "be" here for compatibility with older versions
            elif ns in ('fplx', 'be'):
                db_refs['FPLX'] = xr['id']
            elif ns == 'proonto':
                db_refs['PR'] = xr['id']
            # These name spaces are ignored
            elif ns in ['uaz']:
                pass
            else:
                logger.warning('Unhandled xref namespace: %s' % ns)
        db_refs['TEXT'] = entity_term['text']

        # If we have a UniProt grounding and we have a non-default
        # organism priority list, we call the prioritization function
        if db_refs.get('UP'):
            if organism_priority:
                # These are all the unique groundings in the alt-xrefs list,
                # which redundantly lists the same match multiple times because
                # it enumerates multiple synonyms for organisms redundantly
                unique_altxrefs = \
                    set((axr['namespace'], axr['id'])
                        for axr in entity_term.get('alt-xrefs', []))
                # This returns a single prioritized UniProt ID or None
                prioritized_id = \
                    prioritize_organism_grounding(db_refs['UP'],
                                                  unique_altxrefs,
                                                  organism_priority)
                # If we got an ID, we set the UP grounding to that, otherwise
                # we keep what we already got from the primary xref
                if prioritized_id:
                    db_refs['UP'] = prioritized_id
            # After all this, we need to separate protein chain grounding
            # and so if we are dealing with one of those, we pop out the UP
            # key, split the ID to get the chain ID and add that in the UPPRO
            # namespace.
            if '#' in db_refs['UP']:
                up_id = db_refs.pop('UP', None)
                db_refs['UPPRO'] = up_id.split('#')[1]

        db_refs = standardize_db_refs(db_refs)
        return db_refs
Exemplo n.º 5
0
 def _get_db_refs(entity_term):
     agent_name = entity_term['text']
     db_refs = {}
     for xr in entity_term['xrefs']:
         ns = xr['namespace']
         if ns == 'uniprot':
             up_id = xr['id']
             db_refs['UP'] = up_id
             # Look up official names in UniProt
             gene_name = up_client.get_gene_name(up_id)
             if gene_name is not None:
                 agent_name = gene_name
                 # If the gene name corresponds to an HGNC ID, add it to the
                 # db_refs
                 if up_client.is_human(up_id):
                     hgnc_id = hgnc_client.get_hgnc_id(gene_name)
                     if hgnc_id:
                         db_refs['HGNC'] = hgnc_id
         elif ns == 'hgnc':
             hgnc_id = xr['id']
             db_refs['HGNC'] = hgnc_id
             # Look up the standard gene symbol and set as name
             hgnc_name = hgnc_client.get_hgnc_name(hgnc_id)
             if hgnc_name:
                 agent_name = hgnc_name
             # Look up the corresponding uniprot id
             up_id = hgnc_client.get_uniprot_id(hgnc_id)
             if up_id:
                 db_refs['UP'] = up_id
         elif ns == 'pfam':
             be_id = famplex_map.get(('PF', xr['id']))
             if be_id:
                 db_refs['FPLX'] = be_id
                 agent_name = be_id
             db_refs['PF'] = xr['id']
         elif ns == 'interpro':
             be_id = famplex_map.get(('IP', xr['id']))
             if be_id:
                 db_refs['FPLX'] = be_id
                 agent_name = be_id
             db_refs['IP'] = xr['id']
         elif ns == 'chebi':
             db_refs['CHEBI'] = xr['id']
         elif ns == 'pubchem':
             db_refs['PUBCHEM'] = xr['id']
         elif ns == 'go':
             go_id = xr['id']
             # Handle secondary to primary mapping if necessary
             pri = go_client.get_primary_id(go_id)
             if pri:
                 go_id = pri
             db_refs['GO'] = go_id
         elif ns == 'mesh':
             db_refs['MESH'] = xr['id']
         elif ns == 'hmdb':
             db_refs['HMDB'] = xr['id']
         elif ns == 'simple_chemical':
             if xr['id'].startswith('HMDB'):
                 db_refs['HMDB'] = xr['id']
         elif ns == 'be':
             db_refs['FPLX'] = xr['id']
             agent_name = db_refs['FPLX']
         # These name spaces are ignored
         elif ns in ['uaz']:
             pass
         else:
             logger.warning('Unhandled xref namespace: %s' % ns)
     db_refs['TEXT'] = entity_term['text']
     return agent_name, db_refs