def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]: """ Args: lookup: compound: data: Returns: """ if data.get("target_chembl_id") is None: logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}") return [] chembl_id = data["target_chembl_id"] target_obj = TargetFactory.find(chembl_id, self.api) if not self.should_include(lookup, compound, data, target_obj): return [] # traverse() will return the source target if it's a non-traversable type (like DNA) # and the subclass decided whether to filter those # so don't worry about that here ancestors = self.traversal_strategy(target_obj) lst = [] for ancestor in ancestors: lst.extend(self.to_hit(lookup, compound, data, ancestor)) return lst
def test_parents(self): dat = dict( target_chembl_id="CHEMBL4444", pref_name="dopamine transporter", target_type="SINGLE_PROTEIN", ) monoamine = dict( target_chembl_id="CHEMBL1111", pref_name="monoamine transporter", target_type="SINGLE_PROTEIN", ) receptor = dict(target_chembl_id="CHEMBL0000", pref_name="receptor", target_type="PROTEIN_COMPLEX") relations = [ dict(relationship="SUBSET OF", related_target_chembl_id="CHEMBL1111"), dict(relationship="SUBSET OF", related_target_chembl_id="CHEMBL0000"), ] get_target = { "DAT": dat, "CHEMBL4444": dat, "CHEMBL1111": monoamine, "CHEMBL0000": receptor, } def filter_targets(kwargs): x = kwargs["target_chembl_id"] if x == "CHEMBL4444": return [dat] elif x == "CHEMBL1111": return [monoamine] elif x == "CHEMBL0000": return [receptor] def filter_relations(kwargs): return relations api = ChemblApi.mock({ "target": ChemblEntrypoint.mock(get_target, filter_targets), "target_relation": ChemblEntrypoint.mock({}, filter_relations), }) target = TargetFactory.find("CHEMBL4444", api) assert len(target.links({TargetRelationshipType.subset_of})) == 2 # should sort by CHEMBL ID first, so 0000 will be first parent, link_type = target.links({TargetRelationshipType.subset_of})[0] assert parent.name == "receptor" assert parent.chembl == "CHEMBL0000" parent, link_type = target.links({TargetRelationshipType.subset_of})[1] assert parent.name == "monoamine transporter" assert parent.chembl == "CHEMBL1111"
def test_traverse_gabaa_up(self): target = TargetFactory.find("CHEMBL2109243", Chembl) assert target.chembl == "CHEMBL2109243" link_types = DagTargetLinkType.cross( TargetType.protein_types(), {TargetRelationshipType.subset_of}, TargetType.protein_types(), ) accepted = target.traverse(link_types) assert {t.target.chembl for t in accepted} == {"CHEMBL2109243", "CHEMBL2093872"}
def test_find(self): dat = dict( target_chembl_id="CHEMBL4444", pref_name="dopamine transporter", target_type="SINGLE_PROTEIN", ) api = ChemblApi.mock({"target": ChemblEntrypoint.mock({"DAT": dat})}) target = TargetFactory.find("DAT", api) assert isinstance(target, Target) assert target.type == TargetType.single_protein assert target.name == "dopamine transporter" assert target.chembl == "CHEMBL4444"
def test_traverse_gabaa_up_and_down(self): target = TargetFactory.find("CHEMBL2109243", Chembl) link_types = DagTargetLinkType.cross( TargetType.protein_types(), { TargetRelationshipType.subset_of, TargetRelationshipType.superset_of }, TargetType.protein_types(), ) accepted = target.traverse(link_types) # based on the docs I wrote, originally by looking thru the search results assert len(accepted) > 40 assert len(accepted) < 60 assert {"GABA" in t.target.name.upper() for t in accepted}
def test_traverse_gabaa_up_mouse_2(self): # this is about the same, but now we'll allow that OVERLAPS WITH rel # so we won't find them here target = TargetFactory.find("CHEMBL3139", Chembl) assert target.chembl == "CHEMBL3139" link_types = DagTargetLinkType.cross( TargetType.protein_types(), {TargetRelationshipType.subset_of}, TargetType.protein_types(), ) link_types.add( DagTargetLinkType( TargetType.protein_complex, TargetRelationshipType.overlaps_with, TargetType.protein_complex_group, )) accepted = target.traverse(link_types) vals: Mapping[str, DagTarget] = {a.target.chembl: a for a in accepted} assert {t.target.chembl for t in accepted} == { "CHEMBL2094133", "CHEMBL3139", "CHEMBL4296058", "CHEMBL4296059", } assert not vals["CHEMBL3139"].is_end assert vals["CHEMBL2094133"].is_end assert not vals["CHEMBL4296058"].is_end # here's the difference: # by adding the OVERLAPS WITH rel, it now knows it's not at the end assert not vals["CHEMBL4296059"].is_end assert vals["CHEMBL3139"].depth == 0 assert vals["CHEMBL2094133"].depth == 1 # breadth-first! assert vals["CHEMBL2094133"].depth == 1 assert vals["CHEMBL4296058"].depth == 1 assert vals["CHEMBL3139"].link_type is None assert vals["CHEMBL2094133"].link_type == DagTargetLinkType( TargetType.single_protein, TargetRelationshipType.subset_of, TargetType.protein_complex_group, ) assert vals["CHEMBL4296058"].link_type == DagTargetLinkType( TargetType.single_protein, TargetRelationshipType.subset_of, TargetType.protein_complex) assert vals["CHEMBL4296059"].link_type == DagTargetLinkType( TargetType.single_protein, TargetRelationshipType.subset_of, TargetType.protein_complex)
def test_traverse_gabaa_up_mouse(self): # a single protein # branches to GABA A channel complex group CHEMBL2094133 # but also to complexes CHEMBL4296058 and CHEMBL4296059 # weirdly, CHEMBL4296058 then joins up with CHEMBL2094133 # but CHEMBL4296059 does not (it only joins through an OVERLAPS WITH rel) # so that one SHOULD be an "end" (which wouldn't be true in a real traversal strategy, hopefully) target = TargetFactory.find("CHEMBL3139", Chembl) assert target.chembl == "CHEMBL3139" link_types = DagTargetLinkType.cross( TargetType.protein_types(), {TargetRelationshipType.subset_of}, TargetType.protein_types(), ) accepted = target.traverse(link_types) vals: Mapping[str, DagTarget] = {a.target.chembl: a for a in accepted} assert {t.target.chembl for t in accepted} == { "CHEMBL2094133", "CHEMBL3139", "CHEMBL4296058", "CHEMBL4296059", } assert not vals["CHEMBL3139"].is_end assert vals["CHEMBL2094133"].is_end assert not vals["CHEMBL4296058"].is_end assert vals["CHEMBL4296059"].is_end assert vals["CHEMBL3139"].depth == 0 assert vals["CHEMBL2094133"].depth == 1 # breadth-first! assert vals["CHEMBL2094133"].depth == 1 assert vals["CHEMBL4296058"].depth == 1 assert vals["CHEMBL3139"].link_type is None assert vals["CHEMBL2094133"].link_type == DagTargetLinkType( TargetType.single_protein, TargetRelationshipType.subset_of, TargetType.protein_complex_group, ) assert vals["CHEMBL4296058"].link_type == DagTargetLinkType( TargetType.single_protein, TargetRelationshipType.subset_of, TargetType.protein_complex) assert vals["CHEMBL4296059"].link_type == DagTargetLinkType( TargetType.single_protein, TargetRelationshipType.subset_of, TargetType.protein_complex)
def test_mu_or(self): target = TargetFactory.find("CHEMBL233", Chembl) found = TargetTraversalStrategies.strategy1(Chembl).traverse(target) assert [f.chembl for f in found] == ["CHEMBL2095181"]
def test_5ht2bc_sel_group(self): target = TargetFactory.find("CHEMBL2111466", Chembl) found = TargetTraversalStrategies.strategy1(Chembl).traverse(target) assert [f.chembl for f in found] == ["CHEMBL2096904"]