def get_chemical_agent(name, mesh_id, cas_id): db_refs = {'MESH': mesh_id} if cas_id: db_refs['CAS'] = cas_id db_refs = standardize_db_refs(db_refs) assert_valid_db_refs(db_refs) return Agent(name, db_refs=db_refs)
def get_standard_agent(name, db_refs, ontology=None, ns_order=None, **kwargs): """Get a standard agent based on the name, db_refs, and a any other kwargs. name : str The name of the agent that may not be standardized. db_refs : dict A dict of db refs that may not be standardized, i.e., may be missing an available UP ID corresponding to an existing HGNC ID. ontology : Optional[indra.ontology.IndraOntology] An IndraOntology object, if not provided, the default BioOntology is used. ns_order : Optional[list] A list of namespaces which are in order of priority with higher priority namespaces appearing earlier in the list. kwargs : Keyword arguments to pass to :func:`Agent.__init__`. Returns ------- Agent A standard agent """ standard_name, db_refs = standardize_name_db_refs(db_refs, ontology=ontology, ns_order=ns_order) if standard_name: name = standard_name assert_valid_db_refs(db_refs) return Agent(name, db_refs=db_refs, **kwargs)
def _get_drug_agent(drug_element): name_tag = db_find(drug_element, 'db:name') name = name_tag.text db_refs = {} # Extract the DrugBank ID drugbank_id_tags = db_findall(drug_element, 'db:drugbank-id') # We do a sort here because sometimes there's more than one # DrugBank ID and we choose the "smaller" one here drugbank_id = sorted([di.text for di in drugbank_id_tags if di.text.startswith('DB')])[0] db_refs['DRUGBANK'] = drugbank_id # Extract CAS ID cas_tag = db_find(drug_element, 'db:cas-number') if cas_tag is not None and cas_tag.text is not None: db_refs['CAS'] = cas_tag.text # Extract other xrefs for xref_tag in db_findall(drug_element, 'db:external-identifiers/' 'db:external-identifier'): resource = db_find(xref_tag, 'db:resource').text identifier = db_find(xref_tag, 'db:identifier').text if resource == 'ChEMBL': db_refs['CHEMBL'] = ensure_chembl_prefix(identifier) elif resource == 'PubChem Compound': db_refs['PUBCHEM'] = identifier elif resource == 'ChEBI': db_refs['CHEBI'] = ensure_chebi_prefix(identifier) assert_valid_db_refs(db_refs) return get_standard_agent(name, db_refs)
def assert_valid_node(self, label): db_ns, db_id = self.get_ns_id(label) if db_ns in {'INDRA_ACTIVITIES', 'INDRA_MODS'}: return try: assert_valid_db_refs({db_ns: db_id}) except Exception as e: logger.warning(e)
def get_context(organism_name, organism_tax_id): if not organism_tax_id: return None tax_id = str(int(organism_tax_id)) db_refs = {'TAXONOMY': tax_id} assert_valid_db_refs(db_refs) species = RefContext(organism_name, db_refs=db_refs) bc = BioContext(species=species) return bc
def _extract_protein(self, name, gene_id): refs = {'EGID': gene_id} hgnc_id = hgnc_client.get_hgnc_from_entrez(gene_id) if hgnc_id is not None: refs['HGNC'] = hgnc_id standard_name, db_refs = standardize_name_db_refs(refs) if standard_name: name = standard_name assert_valid_db_refs(db_refs) return Agent(name, db_refs=db_refs)
def get_gene_agent(name, gene_entrez_id): db_refs = {'EGID': gene_entrez_id} hgnc_id = hgnc_client.get_hgnc_id(name) if hgnc_id: db_refs['HGNC'] = hgnc_id standard_name, db_refs = standardize_name_db_refs(db_refs) assert_valid_db_refs(db_refs) if standard_name: name = standard_name return Agent(name, db_refs=db_refs)
def get_disease_agent(name, disease_id): groundings = disease_id.split('|') db_refs = {} for gr in groundings: db_ns, db_id = gr.split(':') db_refs[db_ns] = db_id standard_name, db_refs = standardize_name_db_refs(db_refs) assert_valid_db_refs(db_refs) if standard_name: name = standard_name return Agent(name, db_refs=db_refs)
def process_selventa_xref(xref): if pandas.isna(xref): return '' db_refs = {} for xref_part in xref.split('|'): prefix, db_id = xref_part.split(':', maxsplit=1) ns = xref_mappings.get(prefix) if not ns: logger.info('Unknown namespace: %s' % prefix) continue db_id = ensure_prefix_if_needed(ns, db_id) db_refs[ns] = db_id assert_valid_db_refs(db_refs) db_refs_str = '|'.join(['%s:%s' % (k, v) for k, v in sorted(db_refs.items())]) return db_refs_str
def _extract_drugs(self, compound_ids, lspci_id): drugs = [] for id_ in compound_ids.split('|'): db_refs = {'LSPCI': lspci_id} if id_.startswith('CHEMBL'): db_refs['CHEMBL'] = id_ elif id_.startswith('HMSL'): db_refs['HMS-LINCS'] = id_.split('HMSL')[1] else: logger.warning('Unhandled ID type: %s' % id_) # Name standardization finds correct names but because # ChEMBL is incomplete as a local resource, we don't # universally standardize its names, instead, we look # it up explicitly when necessary. name, db_refs = standardize_name_db_refs(db_refs) if name is None: # This is one way to detect that the drug could not be # standardized beyond just its name so in the # standardized_only condition, we skip this drug if self.standardized_only: continue elif 'HMS-LINCS' in db_refs: name = \ lincs_client_obj.get_small_molecule_name( db_refs['HMS-LINCS']) elif 'CHEMBL' in db_refs: name = chembl_client.get_chembl_name(db_refs['CHEMBL']) # If name is still None, we just use the ID as the name if name is None: # With the named_only restriction, we skip drugs without # a proper name. if self.named_only: continue name = id_ assert_valid_db_refs(db_refs) drugs.append(Agent(name, db_refs=db_refs)) drugs = list({agent.matches_key(): agent for agent in drugs}.values()) return drugs