Пример #1
0
    def get_compound_dot_dict(self, inchikey: str) -> NestedDotDict:
        """
        Fetches info and put into a dict.

        Args:
            inchikey:

        Returns:
            **Only** ``molecule_chembl_id``, ``pref_name``, "and ``molecule_structures`` are guaranteed to exist
        """
        # CHEMBL
        kind = self.get_query_type(inchikey)
        if kind == QueryType.smiles:
            results = list(
                self.api.molecule.filter(
                    molecule_structures__canonical_smiles__flexmatch=inchikey).
                only(
                    ["molecule_chembl_id", "pref_name",
                     "molecule_structures"]))
            assert len(results) == 1, f"{len(results)} matches for {inchikey}"
            result = results[0]
        else:
            result = self.api.molecule.get(inchikey)
        if result is None:
            raise ValueError(f"Result for compound {inchikey} is null!")
        ch = NestedDotDict(result)
        # molecule_hierarchy can have the actual value None
        if ch.get("molecule_hierarchy") is not None:
            parent = ch["molecule_hierarchy"]["parent_chembl_id"]
            if parent != ch["molecule_chembl_id"]:
                ch = NestedDotDict(self.api.molecule.get(parent))
        return ch
Пример #2
0
 def should_include(self, lookup: str, compound: ChemblCompound,
                    data: NestedDotDict, target: Target) -> bool:
     bad_flags = {
         "potential missing data",
         "potential transcription error",
         "outside typical range",
     }
     if (data.get_as("data_validity_comment",
                     lambda s: s.lower()) in bad_flags or data.req_as(
                         "standard_relation", str) not in ["=", "<", "<="]
             or data.req_as("assay_type", str) != "B"
             or data.get("target_tax_id") is None
             or data.get_as("target_tax_id", int) not in self.tax
             or data.get("pchembl_value") is None or
             data.req_as("pchembl_value", float) < self.config.min_pchembl):
         return False
     if data.get("data_validity_comment") is not None:
         logger.warning(
             f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
         )
     # The `target_organism` doesn't always match the `assay_organism`
     # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for h**o sapiens in xenopus laevis
     # However, it's often something like yeast expressing a human / mouse / etc receptor
     # So there's no need to filter by it
     assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
     confidence_score = assay.get("confidence_score")
     if confidence_score is None or confidence_score < self.config.min_confidence_score:
         return False
     if target.type.is_trash or target.type.is_strange and self.config.min_confidence_score > 3:
         logger.warning(f"Excluding {target} with type {target.type}")
         return False
     return True
Пример #3
0
    def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]:
        """

        Args:
            lookup:
            compound:
            data:

        Returns:

        """
        if data.get("target_chembl_id") is None:
            logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}")
            return []
        chembl_id = data["target_chembl_id"]
        target_obj = TargetFactory.find(chembl_id, self.api)
        if not self.should_include(lookup, compound, data, target_obj):
            return []
        # traverse() will return the source target if it's a non-traversable type (like DNA)
        # and the subclass decided whether to filter those
        # so don't worry about that here
        ancestors = self.traversal_strategy(target_obj)
        lst = []
        for ancestor in ancestors:
            lst.extend(self.to_hit(lookup, compound, data, ancestor))
        return lst
Пример #4
0
    def find(cls, chembl: str) -> Target:
        """

        Args:
            chembl:

        Returns:

        """
        targets = cls.api().target.filter(target_chembl_id=chembl)
        assert len(targets) == 1, f"Found {len(targets)} targets for {chembl}"
        target = NestedDotDict(targets[0])
        return cls(
            chembl=target["target_chembl_id"],
            name=target.get("pref_name"),
            type=TargetType.of(target["target_type"]),
        )
Пример #5
0
 def test_settings(self):
     toml = NestedDotDict.read_toml(get_test_resource("settings.toml"))
     x = Settings.load(toml)
     assert x.taxon == 1111
     assert x.min_pchembl == 15
     assert x.min_confidence_score == 2
     assert x.min_phase == 0
     assert str(x.cache_path) == "~"
     assert x.n_retries == 100
     assert not x.fast_save
     assert x.timeout_sec == 0
Пример #6
0
    def process(self, lookup: str, compound: ChemblCompound,
                indication: NestedDotDict) -> IndicationHit:
        """

        Args:
            lookup:
            compound:
            indication:

        Returns:

        """
        return IndicationHit(
            indication.req_as("drugind_id", str),
            compound.chid,
            compound.inchikey,
            lookup,
            compound.name,
            object_id=indication.req_as("mesh_id", str),
            object_name=indication.req_as("mesh_heading", str).strip("\n"),
            max_phase=indication.req_as("max_phase_for_ind", int),
        )
Пример #7
0
    def get_target(self, chembl: str) -> NestedDotDict:
        """
        Queries for the target.

        Args:
            chembl:

        Returns:

        """
        targets = self.api.target.filter(target_chembl_id=chembl)
        assert len(targets) == 1
        return NestedDotDict(targets[0])
Пример #8
0
    def search_for(
        what: What,
        compounds: Union[Sequence[str], PurePath],
        config: Union[None, Mapping[str, Any], Path],
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """

        Args:
            what:
            compounds:
            config:

        Returns:

        """
        if isinstance(compounds, (PurePath, str)):
            compounds = Path(compounds).read_text(encoding="utf8").splitlines()
        compounds = [c.strip() for c in compounds if len(c.strip()) > 0]
        if config is None:
            settings = Settings.load(NestedDotDict({}))
        elif isinstance(config, PurePath):
            settings = Settings.load(NestedDotDict.read_toml(config))
        elif isinstance(config, NestedDotDict):
            settings = config
        else:
            settings = Settings.load(NestedDotDict(config))
        settings.set()
        compounds = list(compounds)
        api = ChemblApi.wrap(Chembl)
        taxonomy = TaxonomyCaches.load(settings.taxon)
        hits = what.clazz(api, settings, taxonomy).find_all(compounds)
        # collapse over and sort the triples
        triples = sorted(list({hit.to_triple() for hit in hits}))
        df = pd.DataFrame([
            pd.Series({f: getattr(h, f)
                       for f in what.clazz.hit_fields()}) for h in hits
        ])
        return df, triples
Пример #9
0
 def test_mocked(self):
     api = ChemblApi.mock(
         {"target": ChemblEntrypoint.mock({"DAT": {
             "x": ""
         }})})
     dotdict = NestedDotDict({"x": ""})
     assert api.target is not None
     assert api.target.get("DAT") is not None
     assert isinstance(api.target.get("DAT"), NestedDotDict)
     assert api.target.get("DAT") == dotdict
     with pytest.raises(KeyError):
         assert api.target.get("fasw")
     assert isinstance(api.target.filter(), ChemblFilterQuery)
     assert isinstance(api.target.filter().only([]), ChemblFilterQuery)
     z = list(api.target.filter().only([]))
     assert z == [dotdict]
Пример #10
0
    def find(self, lookup: str) -> Sequence[H]:
        """

        Args:
            lookup:

        Returns:

        """
        form = self.get_compound(lookup)
        results = self.query(form)
        hits = []
        for result in results:
            result = NestedDotDict(result)
            hits.extend(self.process(lookup, form, result))
        return hits
Пример #11
0
 def load(cls, data: NestedDotDict) -> Settings:
     #  117571
     if IN_CLI:
         cache_path = (
             Path(__file__).parent.parent.parent / "tests" / "resources" / ".mandos-cache"
         )
     else:
         cache_path = Path.home() / ".mandos" / "chembl"
     return Settings(
         data.get_as("is_testing", bool, False),
         data.get_as("mandos.taxon", int, 7742),
         data.get_as("mandos.min_pchembl", float, 6.0),
         data.get_as("mandos.min_confidence_score", int, 4),
         data.get_as("mandos.min_phase", int, 3),
         data.get_as("chembl.cache_path", Path, cache_path),
         data.get_as("chembl.n_retries", int, 1),
         data.get_as("chembl.fast_save", bool, True),
         data.get_as("chembl.timeout_sec", int, 1),
     )
Пример #12
0
 def to_hit(self, lookup: str, compound: ChemblCompound,
            data: NestedDotDict, target: Target) -> Sequence[MechanismHit]:
     # these must match the constructor of the Hit,
     # EXCEPT for object_id and object_name, which come from traversal
     x = NestedDotDict(
         dict(
             record_id=data["mec_id"],
             compound_id=compound.chid,
             inchikey=compound.inchikey,
             compound_name=compound.name,
             compound_lookup=lookup,
             action_type=data["action_type"],
             direct_interaction=data["direct_interaction"],
             description=data["mechanism_of_action"],
             exact_target_id=data["target_chembl_id"],
         ))
     return [
         MechanismHit(**x, object_id=target.chembl, object_name=target.name)
     ]
Пример #13
0
 def _extract(self, lookup: str, compound: ChemblCompound,
              data: NestedDotDict) -> NestedDotDict:
     # we know these exist from the query
     organism = data.req_as("target_organism", str)
     tax_id = data.req_as("target_tax_id", int)
     tax = self.tax.req(tax_id)
     if organism != tax.name:
         logger.warning(f"Target organism {organism} is not {tax.name}")
     return NestedDotDict(
         dict(
             record_id=data.req_as("activity_id", str),
             compound_id=compound.chid,
             inchikey=compound.inchikey,
             compound_name=compound.name,
             compound_lookup=lookup,
             taxon_id=tax.id,
             taxon_name=tax.name,
             pchembl=data.req_as("pchembl_value", float),
             std_type=data.req_as("standard_type", str),
             src_id=data.req_as("src_id", str),
             exact_target_id=data.req_as("target_chembl_id", str),
         ))
Пример #14
0
 def _process(self, match: ProteinHit,
              target: NestedDotDict) -> Sequence[GoHit]:
     terms = set()
     if target.get("target_components") is not None:
         for comp in target["target_components"]:
             if comp.get("target_component_xrefs") is not None:
                 for xref in comp["target_component_xrefs"]:
                     if xref["xref_src_db"] == f"Go{self.go_type.name.capitalize()}":
                         terms.add((xref["xref_id"], xref["xref_name"]))
     hits = []
     for xref_id, xref_name in terms:
         hits.append(
             GoHit(
                 None,
                 compound_id=match.compound_id,
                 inchikey=match.inchikey,
                 compound_lookup=match.compound_lookup,
                 compound_name=match.compound_name,
                 object_id=xref_id,
                 object_name=xref_name,
                 go_type=self.go_type.name,
                 protein_hit=match,
             ))
     return hits
Пример #15
0
 def from_toml_file(
         cls,
         path: PathLike,
         *,
         warn: Union[bool, Callable[[Response], Any]] = True) -> Notifier:
     return cls.from_dict(NestedDotDict.read_toml(path), warn=warn)
Пример #16
0
 def __init__(self):
     path = os.environ.get("VALARDAGGER_CONFIG",
                           Path("/etc", "valardagger.toml"))
     self.config = NestedDotDict.read_toml(path)
     self.valar = valarpy.Valar()
     self.model = None
Пример #17
0
 def test_empty(self):
     toml = NestedDotDict.read_toml(
         get_test_resource("settings-empty.toml"))
     x = Settings.load(toml)
     assert x.min_phase == 3
Пример #18
0
 def get(self, arg: str) -> Optional[NestedDotDict]:
     return NestedDotDict(get_items[arg])
Пример #19
0
 def get(self, arg: str) -> Optional[NestedDotDict]:
     return NestedDotDict(getattr(obj, "get")(arg))
Пример #20
0
 def __getitem__(self, item: int) -> NestedDotDict:
     return NestedDotDict(query[item])
Пример #21
0
 def __iter__(self) -> Iterator[NestedDotDict]:
     return iter([NestedDotDict(x) for x in query])