def preassemble(self, filters=None, grounding_map=None): """Preassemble the Statements collected in the model. Use INDRA's GroundingMapper, Preassembler and BeliefEngine on the IncrementalModel and save the unique statements and the top level statements in class attributes. Currently the following filter options are implemented: - grounding: require that all Agents in statements are grounded - human_only: require that all proteins are human proteins - prior_one: require that at least one Agent is in the prior model - prior_all: require that all Agents are in the prior model Parameters ---------- filters : Optional[list[str]] A list of filter options to apply when choosing the statements. See description above for more details. Default: None grounding_map : Optional[dict] A user supplied grounding map which maps a string to a dictionary of database IDs (in the format used by Agents' db_refs). """ stmts = self.get_statements() # Filter out hypotheses stmts = ac.filter_no_hypothesis(stmts) # Fix grounding if grounding_map is not None: stmts = ac.map_grounding(stmts, grounding_map=grounding_map) else: stmts = ac.map_grounding(stmts) if filters and ('grounding' in filters): stmts = ac.filter_grounded_only(stmts) # Fix sites stmts = ac.map_sequence(stmts) if filters and 'human_only' in filters: stmts = ac.filter_human_only(stmts) # Run preassembly stmts = ac.run_preassembly(stmts, return_toplevel=False) # Run relevance filter stmts = self._relevance_filter(stmts, filters) # Save Statements self.assembled_stmts = stmts
def test_map_grounding(): a = Agent('MEK', db_refs={'TEXT': 'MEK'}) b = Agent('X', db_refs={'TEXT': 'ERK'}) st = Activation(a, b) st_out = ac.map_grounding([st], do_rename=False) assert (len(st_out) == 1) assert (st_out[0].subj.db_refs.get('BE')) assert (st_out[0].obj.db_refs.get('BE')) assert (st_out[0].obj.name == 'X') st_out = ac.map_grounding([st], do_rename=True) assert (len(st_out) == 1) assert (st_out[0].subj.db_refs.get('BE')) assert (st_out[0].obj.db_refs.get('BE')) assert (st_out[0].obj.name == 'ERK')
def test_map_grounding(): a = Agent('MEK', db_refs={'TEXT': 'MEK'}) b = Agent('X', db_refs={'TEXT': 'ERK'}) st = Activation(a, b) st_out = ac.map_grounding([st], do_rename=False) assert len(st_out) == 1 assert st_out[0].subj.db_refs.get('FPLX') assert st_out[0].obj.db_refs.get('FPLX') assert st_out[0].obj.name == 'X' st_out = ac.map_grounding([st], do_rename=True) assert len(st_out) == 1 assert st_out[0].subj.db_refs.get('FPLX') assert st_out[0].obj.db_refs.get('FPLX') assert st_out[0].obj.name == 'ERK'
def respond_get_paper_model(self, content): """Get and display the model from a paper, indicated by pmid.""" pmid_raw = content.gets('pmid') prefix = 'PMID-' if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit(): pmid = pmid_raw[len(prefix):] else: return self.make_failure('BAD_INPUT') try: stmts = get_statements_for_paper([('pmid', pmid)], simple_response=True) except IndraDBRestAPIError as e: if e.status_code == 404 and 'Invalid or unavailable' in e.reason: logger.error("Could not find pmid: %s" % e.reason) return self.make_failure('MISSING_MECHANISM') else: raise e if not stmts: resp = KQMLPerformative('SUCCESS') resp.set('relations-found', 0) return resp stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) unique_stmts = ac.run_preassembly(stmts, return_toplevel=True) diagrams = _make_diagrams(stmts) self.send_display_model(diagrams) resp = KQMLPerformative('SUCCESS') resp.set('relations-found', len(unique_stmts)) resp.set('dump-limit', str(DUMP_LIMIT)) return resp
def get_text_grounding_counts(stmts): """Return countss of entity texts and evidence texts for those entity texts.""" texts = [] ev_text_for_agent_text = {} # Iterate over each statement and its agents stmts = ac.map_grounding(stmts) for stmt in tqdm.tqdm(stmts): for idx, agent in enumerate(stmt.agent_list()): if agent is None or 'TEXT' not in agent.db_refs: continue # Get some properties of the assembled agent (grounding, # standard name, link-out URL) gr = agent.get_grounding() url = get_identifiers_url(*gr) if gr[0] is not None else '' agent_txt = agent.db_refs['TEXT'] ev_text_for_agent_text[agent_txt] = (stmt.evidence[0].pmid, stmt.evidence[0].text) gilda_grounding = gilda.ground(agent_txt) gilda_grounding = '%s:%s' % (gilda_grounding[0].term.db, gilda_grounding[0].term.id) \ if gilda_grounding else '' # We now add a new entry to the text-grounding list texts.append((agent_txt, ('%s:%s' % gr) if gr[0] else '', agent.name, url, gilda_grounding)) # Count the unique text-grounding entries cnt = Counter(texts) return cnt, ev_text_for_agent_text
def _do_old_fashioned_preassembly(stmts): grounded_stmts = ac.map_grounding(stmts, use_adeft=True, gilda_mode='local') ms_stmts = ac.map_sequence(grounded_stmts, use_cache=True) opa_stmts = ac.run_preassembly(ms_stmts, return_toplevel=False) return opa_stmts
def _make_unique_statement_set(self, stmt_tpls): """Perform grounding, sequence mapping, and find unique set from stmts. This method returns a list of statement objects, as well as a set of tuples of the form (uuid, matches_key) which represent the links between raw (evidence) statements and their unique/preassembled counterparts. """ stmts = [] uuid_sid_dict = {} for sid, stmt in stmt_tpls: uuid_sid_dict[stmt.uuid] = sid stmts.append(stmt) stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) stmt_groups = self.pa._get_stmt_matching_groups(stmts) unique_stmts = [] evidence_links = defaultdict(lambda: set()) for _, duplicates in stmt_groups: # Get the first statement and add the evidence of all subsequent # Statements to it for stmt_ix, stmt in enumerate(duplicates): if stmt_ix == 0: first_stmt = stmt.make_generic_copy() stmt_hash = first_stmt.get_hash(shallow=True) evidence_links[stmt_hash].add(uuid_sid_dict[stmt.uuid]) # This should never be None or anything else assert isinstance(first_stmt, type(stmt)) unique_stmts.append(first_stmt) return unique_stmts, flatten_evidence_dict(evidence_links)
def main(args): # This file takes about 32 GB to load if not args.infile: args.infile = './Data/indra_raw/bioexp_all_raw.pkl' if not args.outfile: args.outfile = './filtered_indra_network.sif' # Load statements from file stmts_raw = assemble_corpus.load_statements(args.infile) # Expand families, fix grounding errors and run run preassembly stmts_fixed = assemble_corpus.run_preassembly( assemble_corpus.map_grounding( assemble_corpus.expand_families(stmts_raw))) # Default filtering: specific (unique) genes that are grounded. stmts_filtered = assemble_corpus.filter_grounded_only( assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True)) # Custom filters if args.human_only: stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered) if args.filter_direct: stmts_filtered = assemble_corpus.filter_direct(stmts_filtered) binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None] rows = [] for s in binary_stmts: rows.append([ag.name for ag in s.agent_list()]) # Write rows to .sif file with open(args.outfile, 'w', newline='') as csvfile: wrtr = csv.writer(csvfile, delimiter='\t') for row in rows: wrtr.writerow(row)
def respond_get_paper_model(self, content): """Get and display the model from a paper, indicated by pmid.""" pmid_raw = content.gets('pmid') prefix = 'PMID-' if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit(): pmid = pmid_raw[len(prefix):] else: return self.make_failure('BAD_INPUT') try: stmts = get_statements_for_paper([('pmid', pmid)]) except IndraDBRestAPIError as e: if e.status_code == 404 and 'Invalid or unavailable' in e.reason: logger.error("Could not find pmid: %s" % e.reason) return self.make_failure('MISSING_MECHANISM') else: raise e if not stmts: resp = KQMLPerformative('SUCCESS') resp.set('relations-found', 0) return resp stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) unique_stmts = ac.run_preassembly(stmts, return_toplevel=True) diagrams = _make_diagrams(stmts) self.send_display_model(diagrams) resp = KQMLPerformative('SUCCESS') resp.set('relations-found', len(unique_stmts)) resp.set('dump-limit', str(DUMP_LIMIT)) return resp
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def process_statements(stmts, **generate_id_map_kwargs): stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) pa = Preassembler(hierarchies) unique_stmts = make_unique_statement_set(pa, stmts) match_key_maps = get_match_key_maps(pa, unique_stmts, **generate_id_map_kwargs) return unique_stmts, match_key_maps
def test_map_grounding_user_map(): gm = {'MEK': {'XXX': 'YYY'}, 'ERK': {'FPLX': 'ERK'}} a = Agent('MEK', db_refs={'TEXT': 'MEK'}) b = Agent('X', db_refs={'TEXT': 'ERK'}) st = Activation(a, b) st_out = ac.map_grounding([st], grounding_map=gm, do_rename=True) assert len(st_out) == 1 assert st_out[0].subj.db_refs.get('XXX') == 'YYY' assert st_out[0].obj.db_refs.get('FPLX') == 'ERK' assert st_out[0].obj.name == 'ERK'
def run_assembly(stmts, filename): stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) #stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, gene_names, 'one', allow_families=True) stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False, poolsize=4) ac.dump_statements(stmts, filename) return stmts
def map_grounding(): """Map grounding on a list of INDRA Statements.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') stmts = stmts_from_json(stmts_json) stmts_out = ac.map_grounding(stmts) return _return_stmts(stmts_out)
def test_readme_pipeline(): stmts = gn_stmts # Added only here, not in docs from indra.tools import assemble_corpus as ac stmts = ac.filter_no_hypothesis(stmts) stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.8) assert stmts, 'Update example to yield statements list of non-zero length'
def _clean_statements(self, stmts): """Perform grounding, sequence mapping, and find unique set from stmts. This method returns a list of statement objects, as well as a set of tuples of the form (uuid, matches_key) which represent the links between raw (evidence) statements and their unique/preassembled counterparts. """ self._log("Map grounding...") stmts = ac.map_grounding(stmts) self._log("Map sequences...") stmts = ac.map_sequence(stmts, use_cache=True) return stmts
def run_assembly(stmts, save_file): stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, gene_names, 'one') stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) ac.dump_statements(stmts, save_file) return stmts
def map_grounding(): """Map grounding on a list of INDRA Statements.""" response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') stmts = stmts_from_json(stmts_json) stmts_out = ac.map_grounding(stmts) if stmts_out: stmts_json = stmts_to_json(stmts_out) res = {'statements': stmts_json} return res else: res = {'statements': []} return res
def test_map_grounding_user_map(): gm = {'MEK': {'XXX': 'YYY'}, 'ERK': {'FPLX': 'ERK'}} a = Agent('MEK', db_refs={'TEXT': 'MEK'}) b = Agent('X', db_refs={'TEXT': 'ERK'}) st = Activation(a, b) st_out = ac.map_grounding([st], grounding_map=gm, do_rename=True) assert len(st_out) == 1 assert st_out[0].subj.db_refs.get('XXX') == 'YYY' assert st_out[0].obj.db_refs.get('FPLX') == 'ERK' assert st_out[0].obj.name == 'ERK' gm = {'ERK': {'FPLX': 'ERK_TEST'}} st_out = ac.map_grounding([st], grounding_map=gm, grounding_map_policy='extend') assert len(st_out) == 1 assert st_out[0].subj.db_refs.get('FPLX') == 'MEK' assert st_out[0].obj.db_refs.get('FPLX') == 'ERK_TEST' st_out = ac.map_grounding([st]) # Make sure the extension to the default grounding map doesn't persist assert len(st_out) == 1 assert st_out[0].subj.db_refs.get('FPLX') == 'MEK' assert st_out[0].obj.db_refs.get('FPLX') == 'ERK' assert st_out[0].obj.name == 'ERK'
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements. Returns ------- stmts : list[indra.statements.Statement] The list of assembled INDRA Statements. """ stmts = self.get_indra_smts() stmts = ac.filter_no_hypothesis(stmts) stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) return stmts
def get_indra_phos_stmts(): stmts = by_gene_role_type(stmt_type='Phosphorylation') stmts += by_gene_role_type(stmt_type='Dephosphorylation') stmts = ac.map_grounding(stmts) # Expand families before site mapping stmts = ac.expand_families(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.map_sequence(stmts) ac.dump_statements(stmts, 'sources/indra_phos_sitemap.pkl') stmts = ac.run_preassembly(stmts, poolsize=4, save='sources/indra_phos_stmts_pre.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=True) ac.dump_statements(stmts, 'sources/indra_phos_stmts.pkl') return stmts
def test_uppro_assembly(): ag1 = Agent('x', db_refs={'UP': 'P01019', 'UPPRO': 'PRO_0000032457'}) ag2 = Agent('y', db_refs={'UP': 'P01019', 'UPPRO': 'PRO_0000032458'}) assert ag1.get_grounding() == ('UPPRO', ag1.db_refs['UPPRO']) assert ag2.get_grounding() == ('UPPRO', ag2.db_refs['UPPRO']) stmt1 = Phosphorylation(None, ag1) stmt2 = Phosphorylation(None, ag2) assert stmt1.matches_key() != stmt2.matches_key() pa = Preassembler(bio_ontology, [stmt1, stmt2]) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 2, unique_stmts from indra.tools import assemble_corpus as ac stmts = ac.map_grounding([stmt1, stmt2]) pa = Preassembler(bio_ontology, stmts) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 2
def _clean_statements(self, stmts): """Perform grounding, sequence mapping, and find unique set from stmts. This method returns a list of statement objects, as well as a set of tuples of the form (uuid, matches_key) which represent the links between raw (evidence) statements and their unique/preassembled counterparts. """ eliminated_uuids = {} all_uuids = {s.uuid for s in stmts} self._log("Map grounding...") stmts = ac.map_grounding(stmts, use_adeft=True, gilda_mode='local') grounded_uuids = {s.uuid for s in stmts} eliminated_uuids['grounding'] = all_uuids - grounded_uuids self._log("Map sequences...") stmts = ac.map_sequence(stmts, use_cache=True) seqmapped_and_grounded_uuids = {s.uuid for s in stmts} eliminated_uuids['sequence mapping'] = \ grounded_uuids - seqmapped_and_grounded_uuids return stmts, eliminated_uuids
def get_indra_reg_act_stmts(): try: stmts = ac.load_statements('sources/indra_reg_act_stmts.pkl') return stmts except: pass stmts = [] for stmt_type in ('Activation', 'Inhibition', 'ActiveForm'): print("Getting %s statements from INDRA DB" % stmt_type) stmts += by_gene_role_type(stmt_type=stmt_type) stmts = ac.map_grounding(stmts, save='sources/indra_reg_act_gmap.pkl') stmts = ac.filter_grounded_only(stmts) stmts = ac.run_preassembly(stmts, poolsize=4, save='sources/indra_reg_act_pre.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=True) ac.dump_statements(stmts, 'sources/indra_reg_act_stmts.pkl') return stmts
def preprocess_db_stmts(stmts, output_file, filter_stmt_site): """Take the statements from the database and grounding map them; """ print("Mapping grounding") gmap_stmts = ac.map_grounding(stmts) #ac.dump_statements(gmap_stmts, prefix + '_gmap.pkl') print("Sorting and filtering") # Next, eliminate exact duplicates stmts_by_deep_hash = [(s.get_hash(shallow=False), s) for s in gmap_stmts] stmts_by_deep_hash.sort(key=lambda x: x[0]) uniq_stmts = [] for k, group in itertools.groupby(stmts_by_deep_hash, key=lambda x: x[0]): uniq_stmts.append(list(group)[0][1]) if filter_stmt_site: # Filter to statements with residue and position site_stmts = [s for s in uniq_stmts if s.residue and s.position] else: site_stmts = uniq_stmts # Organize into a dictionary indexed by site ac.dump_statements(site_stmts, output_file) return site_stmts
def run_preassembly(self, stmts, print_summary=True): """Run complete preassembly procedure on the given statements. Results are returned as a dict and stored in the attribute :py:attr:`results`. They are also saved in the pickle file `<basename>_results.pkl`. Parameters ---------- stmts : list of :py:class:`indra.statements.Statement` Statements to preassemble. print_summary : bool If True (default), prints a summary of the preassembly process to the console. Returns ------- dict A dict containing the following entries: - `raw`: the starting set of statements before preassembly. - `duplicates1`: statements after initial de-duplication. - `valid`: statements found to have valid modification sites. - `mapped`: mapped statements (list of :py:class:`indra.preassembler.sitemapper.MappedStatement`). - `mapped_stmts`: combined list of valid statements and statements after mapping. - `duplicates2`: statements resulting from de-duplication of the statements in `mapped_stmts`. - `related2`: top-level statements after combining the statements in `duplicates2`. """ stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) self.results = ac.run_preassembly(stmts) # Save the results if we're caching if self.basename is not None: results_filename = '%s_results.pkl' % self.basename with open(results_filename, 'wb') as f: pickle.dump(self.results, f) return self.results
def pa_filter_unique_evidence(stmts): """Wrapper function for chaining preassembly statements meant to reduce the number of statements. stmts : list[:py:class:`indra.statements.Statement`] Returns ------- stmts : list[:py:class:`indra.statements.Statement`] List of preassembled indra statements """ # Ground statemtens: grounded_stmts = ac.map_grounding(stmts) # Use curated site information to standardize modification sites in stmts ms_stmts = ac.map_sequence(grounded_stmts) # Compiles together raw statements to one statement per type opa_stmts = ac.run_preassembly(ms_stmts, return_toplevel=False) return opa_stmts
if __name__ == '__main__': POLYPHENOLS_LIST = 'input/list_polyphenols.xlsx' # Load the list of polyphenols df = pd.read_excel(POLYPHENOLS_LIST) results_dict = {} for name, pubchem_id in df[['polyphenols', 'pubchem_id']].values: # Query the INDRA DB web service using the INDRA Python API idrp = idr.get_statements(agents=[f'{pubchem_id}@PUBCHEM'], ev_limit=100000) # Run preassembly # 1. Fix common named entity normalization ("grounding") errors stmts = ac.map_grounding(idrp.statements) # 2. Fix inconsistent sites of post-translational modifications stmts = ac.map_sequence(stmts) # 3. Identify duplicate/overlapping statements, calculate belief stmts = ac.run_preassembly(stmts) # Convert statements to JSON stmts_json = stmts_to_json(stmts) # Store results in dict indexed by Pubchem ID results_dict[str(pubchem_id)] = { 'name': name, 'statements': stmts_json } # Save to file with open('output/polyphenol_stmts.json', 'wt') as f:
def test_gene_network(): # Chunk 1: this is tested in _get_gene_network_stmts # from indra.tools.gene_network import GeneNetwork # gn = GeneNetwork(['H2AX']) # biopax_stmts = gn.get_biopax_stmts() # bel_stmts = gn.get_bel_stmts() # Chunk 2 from indra import literature pmids = literature.pubmed_client.get_ids_for_gene('H2AX') # Chunk 3 from indra import literature paper_contents = {} for pmid in pmids: content, content_type = literature.get_full_text(pmid, 'pmid') if content_type == 'abstract': paper_contents[pmid] = content if len(paper_contents) == 5: # Is 10 in actual code break # Chunk 4 from indra.sources import reach literature_stmts = [] for pmid, content in paper_contents.items(): rp = reach.process_text(content, url=reach.local_text_url) literature_stmts += rp.statements print('Got %d statements' % len(literature_stmts)) assert literature_stmts # replaces a print statements # Chunk 6 from indra.tools import assemble_corpus as ac # stmts = biopax_stmts + bel_stmts + literature_stmts # tested elsewhere stmts = gn_stmts + literature_stmts # Added instead of above line stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts) assert stmts # Chunk 7 from indra.assemblers.cx import CxAssembler from indra.databases import ndex_client cxa = CxAssembler(stmts) cx_str = cxa.make_model() assert cx_str # Chunk 8 # ndex_cred = {'user': '******', 'password': '******'} # network_id = ndex_client.create_network(cx_str, ndex_cred) # print(network_id) # Chunk 9 from indra.assemblers.indranet import IndraNetAssembler indranet_assembler = IndraNetAssembler(statements=stmts) indranet = indranet_assembler.make_model() assert len(indranet.nodes) > 0, 'indranet conatins no nodes' assert len(indranet.edges) > 0, 'indranet conatins no edges' # Chunk 10 import networkx as nx paths = nx.single_source_shortest_path(G=indranet, source='H2AX', cutoff=1) assert paths # Chunk 11 from indra.assemblers.pysb import PysbAssembler pysb = PysbAssembler(statements=stmts) pysb_model = pysb.make_model() assert pysb_model
with open(fname, 'rt') as fh: genes = fh.read().strip().split('\n') return genes if __name__ == '__main__': outf = 'output/' data = process_data.read_data(process_data.data_file) data_genes = process_data.get_all_gene_names(data) reassemble = False if not reassemble: stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl')) #stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=False) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'one') stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl')) stmts = ac.run_preassembly(stmts, return_toplevel=False, save=pjoin(outf, 'preassembled.pkl')) assemble_models = []
assemble_models = sys.argv[1:] print('Assembling the following model types: %s' % \ ', '.join(assemble_models)) print('##############') outf = 'output/' data = process_data.read_data(process_data.data_file) data_genes = process_data.get_all_gene_names(data) reassemble = False if not reassemble: stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reach_stmts = ac.filter_no_hypothesis(reach_stmts) #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl')) extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl')) reading_stmts = reach_stmts + extra_stmts reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts + extra_stmts stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=False) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'one')
def _do_old_fashioned_preassembly(stmts): grounded_stmts = ac.map_grounding(stmts) ms_stmts = ac.map_sequence(grounded_stmts) opa_stmts = ac.run_preassembly(ms_stmts, return_toplevel=False) return opa_stmts
print("Looking for %s on S3" % key) while True: try: stmts_resp = client.get_object(Bucket='bigmech', Key=key) break except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == 'NoSuchKey': print('Still processing...') # If there was some other kind of problem, re-raise the exception time.sleep(30) stmts_bytes = stmts_resp['Body'].read() stmts_by_paper = pickle.loads(stmts_bytes) stmts = [s for stmt_list in stmts_by_paper.values() for s in stmt_list] print("Grounding entities...") ground_stmts = ac.map_grounding(stmts) print("Detecting duplicate and overlapping statements...") stmts = ac.run_preassembly(ground_stmts) def get(agent_name, stmts): return [ s for s in stmts if s.agent_list()[0] is not None and s.agent_list()[0].name == agent_name ] lines = [] for stmt in stmts: for ev in stmt.evidence: ag1 = ag2 = None if len(stmt.agent_list()) >= 1 and stmt.agent_list()[0]: ag1 = stmt.agent_list()[0].name