def test_flatten_evidence_hierarchy(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='bar')]) pa = Preassembler(bio_ontology, stmts=[st1, st2]) pa.combine_related() assert len(pa.related_stmts) == 1 flattened = flatten_evidence(pa.related_stmts) assert len(flattened) == 1 top_stmt = flattened[0] assert len(top_stmt.evidence) == 2 assert 'bar' in [e.text for e in top_stmt.evidence] assert 'foo' in [e.text for e in top_stmt.evidence] assert len(top_stmt.supported_by) == 1 supporting_stmt = top_stmt.supported_by[0] assert len(supporting_stmt.evidence) == 1 assert supporting_stmt.evidence[0].text == 'foo' supporting_stmt.evidence[0].text = 'changed_foo' assert supporting_stmt.evidence[0].text == 'changed_foo' assert 'changed_foo' not in [e.text for e in top_stmt.evidence] assert 'foo' in [e.text for e in top_stmt.evidence] assert {ev.annotations.get('support_type') for ev in top_stmt.evidence} \ == {'direct', 'supported_by'}
def test_flatten_evidence_hierarchy(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='bar')]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_related() assert len(pa.related_stmts) == 1 flattened = flatten_evidence(pa.related_stmts) assert len(flattened) == 1 top_stmt = flattened[0] assert len(top_stmt.evidence) == 2 assert 'bar' in [e.text for e in top_stmt.evidence] assert 'foo' in [e.text for e in top_stmt.evidence] assert len(top_stmt.supported_by) == 1 supporting_stmt = top_stmt.supported_by[0] assert len(supporting_stmt.evidence) == 1 assert supporting_stmt.evidence[0].text == 'foo' supporting_stmt.evidence[0].text = 'changed_foo' assert supporting_stmt.evidence[0].text == 'changed_foo' assert 'changed_foo' not in [e.text for e in top_stmt.evidence] assert 'foo' in [e.text for e in top_stmt.evidence] assert {ev.annotations.get('support_type') for ev in top_stmt.evidence} \ == {'direct', 'supported_by'}
def test_flatten_evidence_hierarchy_supports(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='bar')]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa_stmts = pa.combine_related(return_toplevel=False) assert len(pa_stmts) == 2 flattened = flatten_evidence(pa_stmts, collect_from='supports') assert len(flattened) == 2 top_stmt = flattened[1] assert len(top_stmt.evidence) == 1 assert 'bar' in [e.text for e in top_stmt.evidence] assert len(top_stmt.supported_by) == 1 supporting_stmt = top_stmt.supported_by[0] assert len(supporting_stmt.evidence) == 2 assert set([e.text for e in supporting_stmt.evidence]) == {'foo', 'bar'}
def test_flatten_evidence_multilevel(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', evidence=[Evidence(text='bar')]) st3 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='baz')]) pa = Preassembler(hierarchies, stmts=[st1, st2, st3]) pa.combine_related() assert len(pa.related_stmts) == 1 flattened = flatten_evidence(pa.related_stmts) assert len(flattened) == 1 top_stmt = flattened[0] assert len(top_stmt.evidence) == 3, len(top_stmt.evidence) anns = [ev.annotations['support_type'] for ev in top_stmt.evidence] assert anns.count('direct') == 1 assert anns.count('supported_by') == 2
def test_flatten_evidence_multilevel(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', evidence=[Evidence(text='bar')]) st3 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='baz')]) pa = Preassembler(bio_ontology, stmts=[st1, st2, st3]) pa.combine_related() assert len(pa.related_stmts) == 1 flattened = flatten_evidence(pa.related_stmts) assert len(flattened) == 1 top_stmt = flattened[0] assert len(top_stmt.evidence) == 3, len(top_stmt.evidence) anns = [ev.annotations['support_type'] for ev in top_stmt.evidence] assert anns.count('direct') == 1 assert anns.count('supported_by') == 2
def test_flatten_evidence_hierarchy_supports(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='bar')]) pa = Preassembler(bio_ontology, stmts=[st1, st2]) pa_stmts = pa.combine_related(return_toplevel=False) assert len(pa_stmts) == 2 flattened = flatten_evidence(pa_stmts, collect_from='supports') assert len(flattened) == 2 top_stmt = flattened[1] assert len(top_stmt.evidence) == 1 assert 'bar' in [e.text for e in top_stmt.evidence] assert len(top_stmt.supported_by) == 1 supporting_stmt = top_stmt.supported_by[0] assert len(supporting_stmt.evidence) == 2 assert set([e.text for e in supporting_stmt.evidence]) == {'foo', 'bar'}
def test_flatten_evidence_hierarchy(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='bar')]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_related() assert len(pa.related_stmts) == 1 flattened = flatten_evidence(pa.related_stmts) assert len(flattened) == 1 top_stmt = flattened[0] assert len(top_stmt.evidence) == 2 assert 'bar' in [e.text for e in top_stmt.evidence] assert 'foo' in [e.text for e in top_stmt.evidence] assert len(top_stmt.supported_by) == 1 supporting_stmt = top_stmt.supported_by[0] assert len(supporting_stmt.evidence) == 1 assert supporting_stmt.evidence[0].text == 'foo'
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker ml = MechLinker(related_stmts) # Link statements linked_stmts = ml.link_statements() # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [ stmt for stmt in related_stmts if stmt not in background_assertions ] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh, protocol=2) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model()) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker # Link statements linked_stmts = MechLinker.infer_active_forms(related_stmts) linked_stmts += MechLinker.infer_modifications(related_stmts) linked_stmts += MechLinker.infer_activations(related_stmts) # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [stmt for stmt in related_stmts if stmt not in background_assertions] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model().encode('utf-8')) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def run_assembly(stmts, folder, pmcid): indexcard_prefix = folder + '/index_cards/' + pmcid otherout_prefix = folder + '/other_outputs/' + pmcid # Filter for grounding grounded_stmts = [] for st in stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(eh, mh) pa.add_statements(grounded_stmts) print '%d statements collected in total.' % len(pa.stmts) unique_stmts = pa.combine_duplicates() print '%d statements after combining duplicates.' % len(unique_stmts) ml = MechLinker(unique_stmts) ml.link_statements() pa = Preassembler(eh, mh, ml.statements) pa.combine_duplicates() related_stmts = pa.combine_related() print '%d statements after combining related.' % len(related_stmts) with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(related_stmts, fh) flattened_evidence_stmts = flatten_evidence(related_stmts) card_counter = 1 card_lim = float('inf') top_stmts = [] for st in sorted(flattened_evidence_stmts, key=lambda x: len(x.evidence), reverse=True): print len(st.evidence), st if is_background_knowledge(st): print 'This statement is background knowledge - skipping.' continue # Assemble IndexCards ia = IndexCardAssembler([st]) ia.make_model() if ia.cards: ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break ea = EnglishAssembler(top_stmts) print '=======================' print ea.make_model() print '=======================' # Print the statement graph graph = render_stmt_graph(related_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv') pya = PysbAssembler() pya.add_statements(related_stmts) model = pya.make_model() print 'PySB model has %d monomers and %d rules' %\ (len(model.monomers), len(model.rules))