def _make_famplex_lookup(): """Create a famplex lookup dictionary. Keys are sorted tuples of HGNC gene names and values are the corresponding FamPlex ID. """ fplx_lookup = {} bio_ontology.initialize() for node in bio_ontology.nodes: ns, id = bio_ontology.get_ns_id(node) if ns == 'FPLX': children = bio_ontology.get_children(ns, id) hgnc_children = [ bio_ontology.get_name(*c) for c in children if c[0] == 'HGNC' ] fplx_lookup[tuple(sorted(hgnc_children))] = id return fplx_lookup
def _add_node(self, agent, uuid=None): node_key = agent.name node_id = self._existing_nodes.get(node_key) # if the node already exists we do not want to add it again # we must however add its uuid if node_id is not None: # fetch the appropriate node n = [x for x in self._nodes if x['data']['id'] == node_id][0] uuid_list = n['data']['uuid_list'] if uuid not in uuid_list: uuid_list.append(uuid) return node_id db_refs = _get_db_refs(agent) node_id = self._get_new_id() self._existing_nodes[node_key] = node_id node_name = agent.name node_name = node_name.replace('_', ' ') if 'FPLX' in db_refs: expanded_families = bio_ontology.get_children( *agent.get_grounding(), ns_filter={'HGNC'}) else: expanded_families = [] members = {} for member in expanded_families: member_db_refs = {member[0]: member[1]} member_db_refs = standardize_db_refs(member_db_refs) gene_name = bio_ontology.get_name(*member) members[gene_name] = {'db_refs': {}} for dbns, dbid in member_db_refs.items(): url = get_identifiers_url(dbns, dbid) if url: members[gene_name]['db_refs'][dbns] = url node = { 'data': { 'id': node_id, 'name': node_name, 'db_refs': db_refs, 'parent': '', 'members': members, 'uuid_list': [uuid] } } self._nodes.append(node) return node_id
def get_pain_mol(): PAIN_SIGNAL_MOL = { "Prostaglandins": "CHEBI:26333", "Brandykinin": "CHEBI:3165" } CHEBI_LIST = {} CHEBI_NAMES = {} for compounds, chebi_id in PAIN_SIGNAL_MOL.items(): CHEBI_LIST[compounds] = \ [children[1] for children in bio_ontology.get_children('CHEBI', chebi_id)] CHEBI_NAMES[compounds] = \ [bio_ontology.get_name('CHEBI', ids) for ids in CHEBI_LIST[compounds]] return CHEBI_NAMES
def get_genes_for_family(family_agent): """Return agents corresponding to specific genes in a given family agent""" from indra.ontology.bio import bio_ontology from indra.ontology.standardize \ import standardize_agent_name family_grounding = family_agent.db_refs.get('FPLX') if not family_grounding: return [] children = bio_ontology.get_children('FPLX', family_grounding) children = [c for c in children if c[0] == 'HGNC'] child_agents = [] for _, hgnc_id in children: child_agent = Agent(None, db_refs={ 'HGNC': hgnc_id, 'TYPE': 'ONT::GENE-PROTEIN' }) standardize_agent_name(child_agent, standardize_refs=True) child_agents.append(child_agent) child_agents = sorted(child_agents, key=lambda x: x.name) return child_agents
def set_style_expression_mutation(self, model, cell_line='A375_SKIN'): """Sets the fill color of each node based on its expression level on the given cell line, and the stroke color based on whether it is a mutation. Parameters ---------- model: list<indra.statements.Statement> A list of INDRA statements cell_line: str A cell line for which we're interested in protein expression level """ labels = self.label_to_glyph_ids.keys() label_to_agent = {} for label in labels: for statement in model: for agent in statement.agent_list(): if agent is not None and _n(agent.name) == label: label_to_agent[label] = agent agent_to_expression_level = {} for agent in label_to_agent.values(): if 'HGNC' not in agent.db_refs and 'FPLX' not in agent.db_refs: # This is not a gene agent_to_expression_level[agent] = 0 continue if 'FPLX' not in agent.db_refs: gene_names = [agent.name] else: children = bio_ontology.get_children('FPLX', agent.db_refs['FPLX']) gene_names = [bio_ontology.get_name(*child) for child in children] # Compute mean expression level expression_levels = [] logger.info('Getting expression status of proteins: %s' % str(gene_names)) l = self.get_expression(gene_names, cell_line) for line in l: for element in l[line]: level = l[line][element] if level is not None: expression_levels.append(l[line][element]) if len(expression_levels) == 0: mean_level = None else: mean_level = sum(expression_levels) / len(expression_levels) agent_to_expression_level[agent] = mean_level # Create a normalized expression score between 0 and 1 # Compute min and maximum levels min_level = None max_level = None for agent, level in agent_to_expression_level.items(): if level is None: continue if min_level is None: min_level = level if max_level is None: max_level = level if level < min_level: min_level = level if level > max_level: max_level = level # Compute scores agent_to_score = {} if max_level is not None: level_span = max_level - min_level for agent, level in agent_to_expression_level.items(): if level is None or level_span == 0: agent_to_score[agent] = 0 else: agent_to_score[agent] = (level - min_level) / level_span # Map scores to colors and assign colors to labels agent_to_color = {} for agent, score in agent_to_score.items(): if 'HGNC' not in agent.db_refs and 'FPLX' not in agent.db_refs: color = cm.Blues(0.3) color_str = colors.to_hex(color[:3]) else: # color = cm.plasma(score) color = cm.Greens(0.6*score + 0.2) color_str = colors.to_hex(color[:3]) assert(len(color_str) == 7) stroke_color = \ self._choose_stroke_color_from_mutation_status(agent.name, cell_line) self.set_style(agent.name, stroke_color, color_str)
def test_mtorc_children(): ch1 = bio_ontology.get_children('FPLX', 'mTORC1') ch2 = bio_ontology.get_children('FPLX', 'mTORC2') assert ('HGNC', hgnc_client.get_hgnc_id('RICTOR')) not in ch1 assert ('HGNC', hgnc_client.get_hgnc_id('RPTOR')) not in ch2
import pickle from collections import Counter from emmaa.model_tests import StatementCheckingTest from indra.ontology.bio import bio_ontology from indra.statements.validate import print_validation_report from indra.ontology.standardize import standardize_agent_name CTD_CHEMICAL_DISEASE = '/Users/ben/data/ctd/ctd_chemical_disease.pkl' pain = ('MESH', 'D010146') pain_and_children = [pain] + bio_ontology.get_children(*pain) def filter_objects(stmts, object_groundings): print('Filtering %d statements' % len(stmts)) filtered_stmts = [] for stmt in stmts: if set(stmt.obj.db_refs.items()) & set(object_groundings): filtered_stmts.append(stmt) print('Filtered to %d statements' % len(filtered_stmts)) return filtered_stmts def get_mappings(): import gilda mappings = set() for stmt in stmts: subj = stmt.subj if 'CHEBI' in subj.db_refs: continue matches = gilda.ground(subj.name)
def expand_with_child_go_terms(terms): all_terms = set(terms) for term in terms: child_terms = bio_ontology.get_children('GO', term) all_terms |= {c[1] for c in child_terms} return all_terms
for compound, names in PAIN_MOL_NAMES.items() if rows[2] in names] df = pd.DataFrame(celltype_pain_interaction) return df if __name__ == "__main__": df = pd.read_csv(PC_SIF_URL, sep='\t', header=None) df = df[df[1] == 'controls-production-of'] pain_signal_mol = { "Prostaglandins": "CHEBI:26333", "Brandykinin": "CHEBI:3165" } chebi_list = {} for compounds, chebi_id in pain_signal_mol.items(): chebi_list[compounds] = [ children[1] for children in bio_ontology.get_children('CHEBI', chebi_id) ] df = df[df[2].isin(chebi_list)] chebi_stmts = [{ 'Enzyme': row[0], 'Statement': row[1], 'CHEBI_ID': row[2], 'CHEBI_Name': bio_ontology.get_name('CHEBI', row[2]) } for _, row in df.iterrows()] df = pd.DataFrame(chebi_stmts) df.to_csv("enzyme_interactions.tsv", sep="\t", header=True, index=False)