Python Downloader示例，omnipath._core.downloader._downloader.Downloader Python示例

示例#1

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

    def test_maybe_download_passes_params(self, downloader: Downloader,
                                          requests_mock, csv_data: bytes):
        csv_url = urljoin(downloader._options.url, "foobar/?format=csv")
        csv_df = pd.read_csv(BytesIO(csv_data))
        json_url = urljoin(downloader._options.url, "foobar/?format=json")
        json_handle = StringIO()
        csv_df.to_json(json_handle)

        requests_mock.register_uri("GET", csv_url, content=csv_data)
        requests_mock.register_uri("GET",
                                   json_url,
                                   content=bytes(json_handle.getvalue(),
                                                 encoding="utf-8"))

        res1 = downloader.maybe_download(csv_url, callback=pd.read_csv)
        res2 = downloader.maybe_download(csv_url, callback=pd.read_csv)

        assert res1 is res2
        assert requests_mock.called_once
        np.testing.assert_array_equal(res1.index, csv_df.index)
        np.testing.assert_array_equal(res1.columns, csv_df.columns)
        np.testing.assert_array_equal(res1.values, csv_df.values)

        res1 = downloader.maybe_download(json_url, callback=pd.read_json)
        res2 = downloader.maybe_download(json_url, callback=pd.read_json)

        assert res1 is res2
        assert len(requests_mock.request_history) == 2
        np.testing.assert_array_equal(res1.index, csv_df.index)
        np.testing.assert_array_equal(res1.columns, csv_df.columns)
        np.testing.assert_array_equal(res1.values, csv_df.values)

示例#2

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

    def test_maybe_download_wrong_callable(self, downloader: Downloader,
                                           requests_mock, csv_data: bytes):
        url = urljoin(downloader._options.url, "foobar")
        requests_mock.register_uri("GET", url, content=csv_data)

        with pytest.raises(ValueError, match=r"Expected object or value"):
            downloader.maybe_download(url, callback=pd.read_json)

示例#3

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

    def test_maybe_download_no_cache(self, downloader: Downloader,
                                     requests_mock, csv_data: bytes):
        url = urljoin(downloader._options.url, "foobar")
        requests_mock.register_uri("GET", url, content=csv_data)

        res1 = downloader.maybe_download(url, callback=pd.read_csv)
        downloader._options.cache.clear()
        res2 = downloader.maybe_download(url, callback=pd.read_csv)

        assert res1 is not res2
        assert len(requests_mock.request_history) == 2
        np.testing.assert_array_equal(res1.index, res2.index)
        np.testing.assert_array_equal(res1.columns, res2.columns)
        np.testing.assert_array_equal(res1.values, res2.values)

示例#4

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

    def test_initialize_local_options(self, options: Options):
        options.password = "******"
        options.timeout = 1337
        d = Downloader(options)

        assert d._options is not options
        assert str(d._options) == str(options)
        assert str(d._options) != str(opt)

        options.password = "******"
        assert d._options.password == "foo"

示例#5

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

    def test_maybe_download_is_not_final(self, downloader: Downloader,
                                         requests_mock, csv_data: bytes):
        endpoint = "barbaz"
        url = urljoin(downloader._options.url, endpoint)
        requests_mock.register_uri("GET", url, content=csv_data)
        csv_df = pd.read_csv(BytesIO(csv_data))

        res = downloader.maybe_download(endpoint,
                                        callback=pd.read_csv,
                                        is_final=False)

        assert requests_mock.called_once
        np.testing.assert_array_equal(res.index, csv_df.index)
        np.testing.assert_array_equal(res.columns, csv_df.columns)
        np.testing.assert_array_equal(res.values, csv_df.values)

示例#6

0

显示文件

文件： _query_validator.py 项目： michalk8/omnipath

    def __new__(cls, clsname, superclasses, attributedict):  # noqa: D102
        from omnipath import options

        endpoint = attributedict.pop("__endpoint__",
                                     clsname.lower().replace("validator", ""))
        use_default = True
        old_members = list(attributedict._member_names)
        old_values = cls._remove_old_members(attributedict)

        if endpoint is None:
            if len(old_members):
                raise ValueError(
                    "If `__endpoint__` is `None`, no members must be specified."
                )
        elif options.autoload:
            use_default = False
            with Options.from_options(
                    options,
                    num_retries=0,
                    timeout=0.1,
                    cache=None,
                    progress_bar=False,
                    chunk_size=2048,
            ) as opt:
                try:
                    logging.debug(
                        "Attempting to construct classes from the server")
                    res = Downloader(opt).maybe_download(
                        urljoin(urljoin(opt.url, f"{Key.QUERIES.s}/"),
                                endpoint),
                        callback=json.load,
                        params={Key.FORMAT.s: Format.JSON.s},
                    )

                    if len({str(k).upper() for k in res.keys()}) != len(res):
                        raise RuntimeError(
                            f"After upper casing, key will not be unique: `{list(res.keys())}`."
                        )

                    for k, value in res.items():
                        if (isinstance(value, str)
                                and "no such query available" in value):
                            raise RuntimeError(
                                f"Invalid endpoint: `{endpoint}`.")

                        key = str(k).upper()
                        if value is None:
                            attributedict[key] = cls.Validator(param=k)
                        elif isinstance(value, Sequence):
                            attributedict[key] = cls.Validator(
                                param=k, haystack={str(v)
                                                   for v in value})
                        else:
                            attributedict[key] = cls.Validator(param=k)
                except Exception as e:
                    logging.debug(
                        f"Unable to construct classes from the server. Reason: `{e}`"
                    )
                    use_default = True

        if use_default:
            if endpoint is not None:
                logging.debug(f"Using predefined class: `{clsname}`." + (
                    "" if options.autoload else
                    " Consider specifying `omnipath.options.autoload = True`"))

            _ = cls._remove_old_members(attributedict)
            for k, v in zip(old_members, old_values):
                attributedict[k] = cls.Validator(param=k, doc=v)

        return super().__new__(cls, clsname, superclasses, attributedict)

示例#7

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

 def test_maybe_download_not_callable(self, downloader: Downloader):
     with pytest.raises(TypeError):
         downloader.maybe_download("foo", callback=None)

示例#8

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

    def test_initialize_global_options(self):
        d = Downloader()

        assert d._options is not opt
        assert str(d._options) == str(opt)

示例#9

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

    def test_str_repr(self, options: Options):
        d = Downloader(options)

        assert str(d) == f"<{d.__class__.__name__}[options={options}]>"
        assert repr(d) == f"<{d.__class__.__name__}[options={options}]>"

示例#10

0

显示文件

文件： test_downloader.py 项目： michalk8/omnipath

 def test_options_wrong_type(self):
     with pytest.raises(TypeError):
         Downloader("foobar")

示例#11

0

显示文件

 def __init__(self):
     self._downloader = Downloader(options)

示例#12

0

显示文件

class OmnipathRequestABC(ABC, metaclass=OmnipathRequestMeta):
    """Base class for all :mod:`omnipath` requests."""

    __string__ = frozenset({"uniprot", "genesymbol"})
    __logical__ = frozenset()
    __categorical__ = frozenset()

    _json_reader = _error_handler(partial(pd.read_json, typ="frame"))
    _tsv_reader = _error_handler(
        partial(pd.read_csv,
                sep="\t",
                header=0,
                squeeze=False,
                low_memory=False))
    _query_type: Optional[QueryType] = None

    def __init__(self):
        self._downloader = Downloader(options)

    @classmethod
    @d.dedent
    def resources(cls, **kwargs) -> Tuple[str]:
        """%(query_resources)s"""
        return cls()._resources(**kwargs)

    @classmethod
    @d.dedent
    def params(cls) -> Dict[str, Any]:
        """%(query_params)s"""
        return {q.param: q.valid for q in cls._query_type.value}

    @classmethod
    def _annotations(cls) -> Dict[str, type]:
        """Return the type annotation for the query parameters."""
        return {q.param: q.annotation for q in cls._query_type.value}

    @classmethod
    def _docs(cls) -> Dict[str, Optional[str]]:
        """Return the type annotation for the query parameters."""
        return {q.param: q.doc for q in cls._query_type.value}

    def _get(self, **kwargs) -> pd.DataFrame:
        kwargs = self._modify_params(kwargs)
        kwargs = self._inject_fields(kwargs)
        kwargs, callback = self._convert_params(kwargs)
        kwargs = self._validate_params(kwargs)
        kwargs = self._finalize_params(kwargs)

        res = self._downloader.maybe_download(self._query_type.endpoint,
                                              params=kwargs,
                                              callback=callback,
                                              is_final=False)

        if self._downloader._options.convert_dtypes:
            res = self._convert_dtypes(res)

        return self._post_process(res)

    def _convert_params(
            self, params: Dict[str, Any]) -> Tuple[Dict[str, Any], Callable]:
        organism = params.pop("organism", params.pop("organisms", None))
        if organism is not None:
            organism = Organism(organism)
            try:
                params[self._query_type("organism").param] = organism.code
            except ValueError:
                pass

        # check the requested format
        fmt = params.pop("format", params.pop("formats", None))
        fmt = Format(Format.TSV if fmt is None else fmt)
        if fmt not in (Format.TSV, Format.JSON):
            logging.warning(
                f"Invalid `{Key.FORMAT.s}={fmt.s!r}`. Using `{Key.FORMAT.s}={Format.TSV.s!r}`"
            )
            fmt = Format.TSV
        callback = self._tsv_reader if fmt == Format.TSV else self._json_reader
        try:
            params[self._query_type("format").param] = fmt.s
        except ValueError:
            pass

        # check the license
        license = params.pop(
            "license", params.pop("licenses",
                                  self._downloader._options.license))
        if license is not None:
            license = License(license)
            try:
                params[self._query_type("license").param] = license
            except ValueError:
                pass

        if self._downloader._options.password is not None:
            params.setdefault(Key.PASSWORD.s,
                              self._downloader._options.password)

        return params, callback

    def _inject_fields(self, params: Dict[str, Any]) -> Dict[str, Any]:
        try:
            _inject_params(
                params,
                key=self._query_type(Key.FIELDS.value).param,
                value=getattr(DEFAULT_FIELD, self._query_type.name).value,
            )
        except AttributeError:
            # no default field for this query
            pass
        except Exception as e:
            logging.warning(
                f"Unable to inject `{Key.FIELDS.value}` for `{self}`. Reason: `{e}`"
            )

        return params

    def _validate_params(
        self,
        params: Dict[str,
                     Any]) -> Dict[str, Optional[Union[str, Sequence[str]]]]:
        """For each passed parameter, validate if it has the correct value."""
        res = {}
        for k, v in params.items():
            # first get the validator for the parameter, then validate
            res[self._query_type(k).param] = self._query_type(k)(v)
        return res

    def _finalize_params(self, params: Dict[str, Any]) -> Dict[str, str]:
        """Convert all the parameters to strings."""
        # this is largely redundant
        res = {}
        for k, v in params.items():
            if isinstance(v, str):
                res[k] = v
            elif isinstance(v, bool):
                res[k] = str(int(v))
            elif isinstance(v, (int, float)):
                res[k] = str(v)
            elif isinstance(v, Iterable):
                res[k] = ",".join(sorted(v))
            elif isinstance(v, Enum):
                res[k] = str(v.value)
            elif v is not None:
                logging.warning(
                    f"Unable to process parameter `{k}={v}`. Ignoring")

        return dict(sorted(res.items(), key=itemgetter(0)))

    def _convert_dtypes(self, res: pd.DataFrame, **_) -> pd.DataFrame:
        """Automatically convert dtypes for this type of query."""
        def to_logical(col: pd.Series) -> pd.Series:
            if is_numeric_dtype(col):
                return col > 0
            return col.astype(str).str.lower().isin(
                ("y", "t", "yes", "true", "1"))

        def handle_logical(df: pd.DataFrame, columns: frozenset) -> None:
            cols = list(frozenset(df.columns) & columns)
            if cols:
                df[cols] = df[cols].apply(to_logical)

        def handle_categorical(df: pd.DataFrame, columns: frozenset) -> None:
            cols = frozenset(df.columns) & columns
            cols = [
                col for col, dtype in zip(cols, df[cols].dtypes)
                if not is_float_dtype(dtype)
            ]
            if cols:
                df[cols] = df[cols].astype("category")

        def handle_string(df: pd.DataFrame, columns: frozenset) -> None:
            for col in frozenset(df.columns) & columns:
                mask = pd.isnull(df[col])
                df[col] = df[col].astype(str)
                df.loc[mask, col] = None

        if not isinstance(res, pd.DataFrame):
            raise TypeError(
                f"Expected the result to be of type `pandas.DataFrame`, found `{type(res).__name__}`."
            )

        handle_logical(res, self.__logical__)
        handle_categorical(res, self.__categorical__)
        handle_string(res, self.__string__)

        return res

    def _resources(self, **kwargs) -> Tuple[str]:
        """
        Return available resources for this type of query.

        Parameters
        ----------
        **kwargs
            Keyword arguments used for filtering unwanted resources.

        Returns
        -------
        tuple
            Unique and sorted resources.
        """
        return tuple(
            sorted(
                res for res, params in self._downloader.resources.items()
                if self._query_type.endpoint in params.get(Key.QUERIES.s, {})
                and self._resource_filter(
                    params[Key.QUERIES.s][self._query_type.endpoint], **kwargs)
            ))

    def _modify_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """
        Remove parameters from this query.

        Parameters
        ----------
        params
            The parameters to filter.

        Returns
        -------
        :class:`dict`
            The filtered parameters.
        """
        return params

    @abstractmethod
    def _post_process(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Post process the result, e.g. by adding extra columns.

        df
            The result from :meth:`get`.

        Returns
        -------
        :class:`pandas.DataFrame`
            The maybe modified result.
        """
        pass

    @abstractmethod
    def _resource_filter(self, data: Mapping[str, Any], **kwargs) -> bool:
        """
        Filter out resources relevant to this query.

        Parameters
        ----------
        data
            Data which is used as a basis for the filtering.
        kwargs
            Additional keyword arguments.

        Returns
        --------
        bool
            `True` if the resource should be included, otherwise `False`.
        """
        pass

    def __str__(self) -> str:
        return f"<{self.__class__.__name__}>"

    def __repr__(self) -> str:
        return str(self)

示例#13

0

显示文件

文件： conftest.py 项目： michalk8/omnipath

def downloader(options) -> "Downloader":
    return Downloader(options)