def test_site_map_hgnc(): """Make sure site mapping is done even if only HGNC ID is given.""" (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() mapk1_invalid.db_refs = {'HGNC': '6871'} st1 = ActiveForm(mapk1_invalid, 'kinase', True) (valid, mapped) = sm.map_sites([st1]) assert len(valid) == 0 assert len(mapped) == 1
def test_invalid_position(): stmt = Phosphorylation._from_json({ 'enz': { 'name': 'CFD' }, 'sub': { 'name': 'HP' }, 'residue': 'F', 'position': '2.59' }) valid, mapped = sm.map_sites(stmts=[stmt]) assert not valid assert not mapped
def test_site_map_within_bound_condition(): # Here, we test to make sure that agents within a bound condition are # site-mapped (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() # Add an agent to the bound condition for the object of the statement mapk3_invalid.bound_conditions = [BoundCondition(mapk1_invalid)] st1 = Activation(mapk1_invalid, mapk3_invalid, 'kinase') # Map sites res = sm.map_sites([st1]) # Extract the mapped statement mapped_statements = res[1] assert (len(mapped_statements) == 1) mapped_s = mapped_statements[0].mapped_stmt # Verify that the agent in the object's bound condition got site-mapped validate_mapk1(mapped_s.obj.bound_conditions[0].agent)
def test_site_map_within_bound_condition(): # Here, we test to make sure that agents within a bound condition are # site-mapped (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() # Add an agent to the bound condition for the object of the statement mapk3_invalid.bound_conditions = [BoundCondition(mapk1_invalid)] st1 = Activation(mapk1_invalid, mapk3_invalid, 'kinase') # Map sites res = sm.map_sites([st1]) # Extract the mapped statement mapped_statements = res[1] assert len(mapped_statements) == 1 mapped_s = mapped_statements[0].mapped_stmt # Verify that the agent in the object's bound condition got site-mapped validate_mapk1(mapped_s.obj.bound_conditions[0].agent)
def test_site_map_selfmodification(): mapk1_invalid = Agent('MAPK1', mods=[ModCondition('phosphorylation', 'T', '183')], db_refs={'UP': 'P28482'}) st1 = Autophosphorylation(mapk1_invalid, 'Y', '185') (valid, mapped) = sm.map_sites([st1]) assert len(valid) == 0 assert len(mapped) == 1 mapped_stmt = mapped[0] mm = mapped_stmt.mapped_mods assert (mm[0].gene_name, mm[0].orig_res, mm[0].orig_pos, mm[0].mapped_res, mm[0].mapped_pos) == ('MAPK1', 'T', '183', 'T', '185') assert (mm[1].gene_name, mm[1].orig_res, mm[1].orig_pos, mm[1].mapped_res, mm[1].mapped_pos) == ('MAPK1', 'Y', '185', 'Y', '187') ms = mapped_stmt.mapped_stmt agent1 = ms.enz assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185')) assert ms.residue == 'Y' assert ms.position == '187' assert unicode_strs((mapk1_invalid, st1, valid, mapped))
def test_site_map_activity_modification(): mc = [ModCondition('phosphorylation', 'T', '183'), ModCondition('phosphorylation', 'Y', '185')] mapk1 = Agent('MAPK1', mods=mc, db_refs={'UP': 'P28482'}) st1 = ActiveForm(mapk1, 'kinase', True) (valid, mapped) = sm.map_sites([st1]) assert len(valid) == 0 assert len(mapped) == 1 ms = mapped[0] mm = ms.mapped_mods assert (mm[0].gene_name, mm[0].orig_res, mm[0].orig_pos, mm[0].mapped_res, mm[0].mapped_pos) == ('MAPK1', 'T', '183', 'T', '185') assert (mm[1].gene_name, mm[1].orig_res, mm[1].orig_pos, mm[1].mapped_res, mm[1].mapped_pos) == ('MAPK1', 'Y', '185', 'Y', '187') assert ms.original_stmt == st1 assert ms.mapped_stmt.agent.mods[0].matches(ModCondition('phosphorylation', 'T', '185')) assert ms.mapped_stmt.agent.mods[1].matches(ModCondition('phosphorylation', 'Y', '187')) assert unicode_strs((mc, mapk1, st1, valid, mapped))
def test_site_map_selfmodification(): mapk1_invalid = Agent('MAPK1', mods=[ModCondition('phosphorylation', 'T', '183')], db_refs={'UP': 'P28482'}) st1 = SelfModification(mapk1_invalid, 'Y', '185') (valid, mapped) = sm.map_sites([st1]) assert len(valid) == 0 assert len(mapped) == 1 mapped_stmt = mapped[0] assert mapped_stmt.mapped_mods[0][0] == ('MAPK1', 'T', '183') assert mapped_stmt.mapped_mods[0][1][0] == 'T' assert mapped_stmt.mapped_mods[0][1][1] == '185' assert mapped_stmt.mapped_mods[1][0] == ('MAPK1', 'Y', '185') assert mapped_stmt.mapped_mods[1][1][0] == 'Y' assert mapped_stmt.mapped_mods[1][1][1] == '187' assert mapped_stmt.original_stmt == st1 ms = mapped_stmt.mapped_stmt agent1 = ms.enz assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185')) assert ms.residue == 'Y' assert ms.position == '187' assert unicode_strs((mapk1_invalid, st1, valid, mapped))
def test_site_map_modification(): mapk1_invalid = Agent('MAPK1', mods=[ ModCondition('phosphorylation', 'T', '183'), ModCondition('phosphorylation', 'Y', '185') ], db_refs={'UP': 'P28482'}) mapk3_invalid = Agent('MAPK3', mods=[ModCondition('phosphorylation', 'T', '201')], db_refs={'UP': 'P27361'}) map2k1_invalid = Agent('MAP2K1', mods=[ ModCondition('phosphorylation', 'S', '217'), ModCondition('phosphorylation', 'S', '221') ], db_refs={'UP': 'Q02750'}) st1 = Phosphorylation(mapk1_invalid, mapk3_invalid, 'Y', '203') st2 = Phosphorylation(map2k1_invalid, mapk1_invalid, 'Y', '218') res = sm.map_sites([st1, st2]) assert len(res) == 2 valid_stmts = res[0] mapped_stmts = res[1] assert isinstance(valid_stmts, list) assert isinstance(mapped_stmts, list) assert len(valid_stmts) == 0 assert len(mapped_stmts) == 2 # MAPK1 -> MAPK3 mapped_stmt1 = mapped_stmts[0] assert isinstance(mapped_stmt1, MappedStatement) assert mapped_stmt1.original_stmt == st1 assert isinstance(mapped_stmt1.mapped_mods, list) assert len(mapped_stmt1.mapped_mods) == 4, \ "Got %d mapped mods." % mapped_stmt1.mapped_mods # FIXME ms = mapped_stmt1.mapped_stmt assert isinstance(ms, Statement) agent1 = ms.enz agent2 = ms.sub assert agent1.name == 'MAPK1' assert len(agent1.mods) == 2 assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185')) assert agent1.mods[1].matches(ModCondition('phosphorylation', 'Y', '187')) assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '202')) assert ms.residue == 'Y' assert ms.position == '204' # MAP2K1 -> MAPK1 mapped_stmt2 = mapped_stmts[1] assert isinstance(mapped_stmt2, MappedStatement) assert mapped_stmt2.original_stmt == st2 assert isinstance(mapped_stmt2.mapped_mods, list) assert len(mapped_stmt2.mapped_mods) == 5, \ "Got %d mapped mods." % mapped_stmt1.mapped_mods # FIXME ms = mapped_stmt2.mapped_stmt assert isinstance(ms, Statement) agent1 = ms.enz agent2 = ms.sub assert agent1.name == 'MAP2K1' assert len(agent1.mods) == 2 assert agent1.mods[0].matches(ModCondition('phosphorylation', 'S', '218')) assert agent1.mods[1].matches(ModCondition('phosphorylation', 'S', '222')) assert len(agent2.mods) == 2 assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '185')) assert agent2.mods[1].matches(ModCondition('phosphorylation', 'Y', '187')) # The incorrect phosphorylation residue is passed through to the new # statement unchanged assert ms.residue == 'Y' assert ms.position == '218' # Check for unicode assert unicode_strs((mapk1_invalid, mapk3_invalid, map2k1_invalid, st1, st2, res, valid_stmts, mapped_stmts))
def test_site_map_complex(): (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() st1 = RasGef(mapk1_invalid, mapk3_invalid) res = sm.map_sites([st1]) check_validated_mapks(res, st1)
def test_site_map_activation(): (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() st1 = Activation(mapk1_invalid, mapk3_invalid, 'kinase') res = sm.map_sites([st1]) check_validated_mapks(res, st1)
def test_site_map_gap(): (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() st1 = Gap(mapk1_invalid, mapk3_invalid) res = sm.map_sites([st1]) check_validated_mapks(res, st1)
def test_site_map_complex(): (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() st1 = Complex([mapk1_invalid, mapk3_invalid]) res = sm.map_sites([st1]) check_validated_mapks(res, st1)
def run_preassembly(self, stmts, print_summary=True): """Run complete preassembly procedure on the given statements. Results are returned as a dict and stored in the attribute :py:attr:`results`. They are also saved in the pickle file `<basename>_results.pkl`. Parameters ---------- stmts : list of :py:class:`indra.statements.Statement` Statements to preassemble. print_summary : bool If True (default), prints a summary of the preassembly process to the console. Returns ------- dict A dict containing the following entries: - `raw`: the starting set of statements before preassembly. - `duplicates1`: statements after initial de-duplication. - `valid`: statements found to have valid modification sites. - `mapped`: mapped statements (list of :py:class:`indra.preassembler.sitemapper.MappedStatement`). - `mapped_stmts`: combined list of valid statements and statements after mapping. - `duplicates2`: statements resulting from de-duplication of the statements in `mapped_stmts`. - `related2`: top-level statements after combining the statements in `duplicates2`. """ # First round of preassembly: remove duplicates before sitemapping pa1 = Preassembler(hierarchies, stmts) logger.info("Combining duplicates") pa1.combine_duplicates() # Map sites logger.info("Mapping sites") (valid, mapped) = sm.map_sites(pa1.unique_stmts) # Combine valid and successfully mapped statements into single list correctly_mapped_stmts = [] for ms in mapped: if all([True if mm[1] is not None else False for mm in ms.mapped_mods]): correctly_mapped_stmts.append(ms.mapped_stmt) mapped_stmts = valid + correctly_mapped_stmts # Second round of preassembly: de-duplicate and combine related pa2 = Preassembler(hierarchies, mapped_stmts) logger.info("Combining duplicates again") pa2.combine_duplicates() pa2.combine_related() # Fill out the results dict self.results = {} self.results['raw'] = stmts self.results['duplicates1'] = pa1.unique_stmts self.results['valid'] = valid self.results['mapped'] = mapped self.results['mapped_stmts'] = mapped_stmts self.results['duplicates2'] = pa2.unique_stmts self.results['related2'] = pa2.related_stmts # Print summary if print_summary: logger.info("\nStarting number of statements: %d" % len(stmts)) logger.info("After duplicate removal: %d" % len(pa1.unique_stmts)) logger.info("Unique statements with valid sites: %d" % len(valid)) logger.info("Unique statements with invalid sites: %d" % len(mapped)) logger.info("After post-mapping duplicate removal: %d" % len(pa2.unique_stmts)) logger.info("After combining related statements: %d" % len(pa2.related_stmts)) # Save the results if we're caching if self.basename is not None: results_filename = '%s_results.pkl' % self.basename with open(results_filename, 'wb') as f: pickle.dump(self.results, f, protocol=2) return self.results
def run_preassembly(self, stmts, print_summary=True): """Run complete preassembly procedure on the given statements. Results are returned as a dict and stored in the attribute :py:attr:`results`. They are also saved in the pickle file `<basename>_results.pkl`. Parameters ---------- stmts : list of :py:class:`indra.statements.Statement` Statements to preassemble. print_summary : bool If True (default), prints a summary of the preassembly process to the console. Returns ------- dict A dict containing the following entries: - `raw`: the starting set of statements before preassembly. - `duplicates1`: statements after initial de-duplication. - `valid`: statements found to have valid modification sites. - `mapped`: mapped statements (list of :py:class:`indra.preassembler.sitemapper.MappedStatement`). - `mapped_stmts`: combined list of valid statements and statements after mapping. - `duplicates2`: statements resulting from de-duplication of the statements in `mapped_stmts`. - `related2`: top-level statements after combining the statements in `duplicates2`. """ # First round of preassembly: remove duplicates before sitemapping pa1 = Preassembler(hierarchies, stmts) logger.info("Combining duplicates") pa1.combine_duplicates() # Map sites logger.info("Mapping sites") (valid, mapped) = sm.map_sites(pa1.unique_stmts) # Combine valid and successfully mapped statements into single list correctly_mapped_stmts = [] for ms in mapped: if all([ True if mm[1] is not None else False for mm in ms.mapped_mods ]): correctly_mapped_stmts.append(ms.mapped_stmt) mapped_stmts = valid + correctly_mapped_stmts # Second round of preassembly: de-duplicate and combine related pa2 = Preassembler(hierarchies, mapped_stmts) logger.info("Combining duplicates again") pa2.combine_duplicates() pa2.combine_related() # Fill out the results dict self.results = {} self.results['raw'] = stmts self.results['duplicates1'] = pa1.unique_stmts self.results['valid'] = valid self.results['mapped'] = mapped self.results['mapped_stmts'] = mapped_stmts self.results['duplicates2'] = pa2.unique_stmts self.results['related2'] = pa2.related_stmts # Print summary if print_summary: logger.info("\nStarting number of statements: %d" % len(stmts)) logger.info("After duplicate removal: %d" % len(pa1.unique_stmts)) logger.info("Unique statements with valid sites: %d" % len(valid)) logger.info("Unique statements with invalid sites: %d" % len(mapped)) logger.info("After post-mapping duplicate removal: %d" % len(pa2.unique_stmts)) logger.info("After combining related statements: %d" % len(pa2.related_stmts)) # Save the results if we're caching if self.basename is not None: results_filename = '%s_results.pkl' % self.basename with open(results_filename, 'wb') as f: pickle.dump(self.results, f, protocol=2) return self.results
def test_site_map_rasgef(): (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() st1 = RasGef(mapk1_invalid, mapk3_invalid) res = sm.map_sites([st1]) check_validated_mapks(res, st1)
def test_site_map_rasgap(): (mapk1_invalid, mapk3_invalid) = get_invalid_mapks() st1 = RasGap(mapk1_invalid, mapk3_invalid) res = sm.map_sites([st1]) check_validated_mapks(res, st1)
def test_site_map_modification(): mapk1_invalid = Agent('MAPK1', mods=[ModCondition('phosphorylation', 'T', '183'), ModCondition('phosphorylation', 'Y', '185')], db_refs={'UP': 'P28482'}) mapk3_invalid = Agent('MAPK3', mods=[ModCondition('phosphorylation', 'T', '201')], db_refs={'UP': 'P27361'}) map2k1_invalid = Agent('MAP2K1', mods=[ModCondition('phosphorylation', 'S', '217'), ModCondition('phosphorylation', 'S', '221')], db_refs={'UP': 'Q02750'}) st1 = Phosphorylation(mapk1_invalid, mapk3_invalid, 'Y', '203') st2 = Phosphorylation(map2k1_invalid, mapk1_invalid, 'Y', '218') res = sm.map_sites([st1, st2]) assert len(res) == 2 valid_stmts = res[0] mapped_stmts = res[1] assert isinstance(valid_stmts, list) assert isinstance(mapped_stmts, list) assert len(valid_stmts) == 0 assert len(mapped_stmts) == 2 # MAPK1 -> MAPK3 mapped_stmt1 = mapped_stmts[0] assert isinstance(mapped_stmt1, MappedStatement) assert mapped_stmt1.original_stmt == st1 assert isinstance(mapped_stmt1.mapped_mods, list) assert len(mapped_stmt1.mapped_mods) == 4 # FIXME ms = mapped_stmt1.mapped_stmt assert isinstance(ms, Statement) agent1 = ms.enz agent2 = ms.sub assert agent1.name == 'MAPK1' assert len(agent1.mods) == 2 assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185')) assert agent1.mods[1].matches(ModCondition('phosphorylation', 'Y', '187')) assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '202')) assert ms.residue == 'Y' assert ms.position == '204' # MAP2K1 -> MAPK1 mapped_stmt2 = mapped_stmts[1] assert isinstance(mapped_stmt2, MappedStatement) assert mapped_stmt2.original_stmt == st2 assert isinstance(mapped_stmt2.mapped_mods, list) assert len(mapped_stmt2.mapped_mods) == 5 # FIXME ms = mapped_stmt2.mapped_stmt assert isinstance(ms, Statement) agent1 = ms.enz agent2 = ms.sub assert agent1.name == 'MAP2K1' assert len(agent1.mods) == 2 assert agent1.mods[0].matches(ModCondition('phosphorylation', 'S', '218')) assert agent1.mods[1].matches(ModCondition('phosphorylation', 'S', '222')) assert len(agent2.mods) == 2 assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '185')) assert agent2.mods[1].matches(ModCondition('phosphorylation', 'Y', '187')) # The incorrect phosphorylation residue is passed through to the new # statement unchanged assert ms.residue == 'Y' assert ms.position == '218' # Check for unicode assert unicode_strs((mapk1_invalid, mapk3_invalid, map2k1_invalid, st1, st2, res, valid_stmts, mapped_stmts))