def test_maybe_download_passes_params(self, downloader: Downloader, requests_mock, csv_data: bytes): csv_url = urljoin(downloader._options.url, "foobar/?format=csv") csv_df = pd.read_csv(BytesIO(csv_data)) json_url = urljoin(downloader._options.url, "foobar/?format=json") json_handle = StringIO() csv_df.to_json(json_handle) requests_mock.register_uri("GET", csv_url, content=csv_data) requests_mock.register_uri("GET", json_url, content=bytes(json_handle.getvalue(), encoding="utf-8")) res1 = downloader.maybe_download(csv_url, callback=pd.read_csv) res2 = downloader.maybe_download(csv_url, callback=pd.read_csv) assert res1 is res2 assert requests_mock.called_once np.testing.assert_array_equal(res1.index, csv_df.index) np.testing.assert_array_equal(res1.columns, csv_df.columns) np.testing.assert_array_equal(res1.values, csv_df.values) res1 = downloader.maybe_download(json_url, callback=pd.read_json) res2 = downloader.maybe_download(json_url, callback=pd.read_json) assert res1 is res2 assert len(requests_mock.request_history) == 2 np.testing.assert_array_equal(res1.index, csv_df.index) np.testing.assert_array_equal(res1.columns, csv_df.columns) np.testing.assert_array_equal(res1.values, csv_df.values)
def test_maybe_download_wrong_callable(self, downloader: Downloader, requests_mock, csv_data: bytes): url = urljoin(downloader._options.url, "foobar") requests_mock.register_uri("GET", url, content=csv_data) with pytest.raises(ValueError, match=r"Expected object or value"): downloader.maybe_download(url, callback=pd.read_json)
def test_maybe_download_no_cache(self, downloader: Downloader, requests_mock, csv_data: bytes): url = urljoin(downloader._options.url, "foobar") requests_mock.register_uri("GET", url, content=csv_data) res1 = downloader.maybe_download(url, callback=pd.read_csv) downloader._options.cache.clear() res2 = downloader.maybe_download(url, callback=pd.read_csv) assert res1 is not res2 assert len(requests_mock.request_history) == 2 np.testing.assert_array_equal(res1.index, res2.index) np.testing.assert_array_equal(res1.columns, res2.columns) np.testing.assert_array_equal(res1.values, res2.values)
def test_initialize_local_options(self, options: Options): options.password = "******" options.timeout = 1337 d = Downloader(options) assert d._options is not options assert str(d._options) == str(options) assert str(d._options) != str(opt) options.password = "******" assert d._options.password == "foo"
def test_maybe_download_is_not_final(self, downloader: Downloader, requests_mock, csv_data: bytes): endpoint = "barbaz" url = urljoin(downloader._options.url, endpoint) requests_mock.register_uri("GET", url, content=csv_data) csv_df = pd.read_csv(BytesIO(csv_data)) res = downloader.maybe_download(endpoint, callback=pd.read_csv, is_final=False) assert requests_mock.called_once np.testing.assert_array_equal(res.index, csv_df.index) np.testing.assert_array_equal(res.columns, csv_df.columns) np.testing.assert_array_equal(res.values, csv_df.values)
def __new__(cls, clsname, superclasses, attributedict): # noqa: D102 from omnipath import options endpoint = attributedict.pop("__endpoint__", clsname.lower().replace("validator", "")) use_default = True old_members = list(attributedict._member_names) old_values = cls._remove_old_members(attributedict) if endpoint is None: if len(old_members): raise ValueError( "If `__endpoint__` is `None`, no members must be specified." ) elif options.autoload: use_default = False with Options.from_options( options, num_retries=0, timeout=0.1, cache=None, progress_bar=False, chunk_size=2048, ) as opt: try: logging.debug( "Attempting to construct classes from the server") res = Downloader(opt).maybe_download( urljoin(urljoin(opt.url, f"{Key.QUERIES.s}/"), endpoint), callback=json.load, params={Key.FORMAT.s: Format.JSON.s}, ) if len({str(k).upper() for k in res.keys()}) != len(res): raise RuntimeError( f"After upper casing, key will not be unique: `{list(res.keys())}`." ) for k, value in res.items(): if (isinstance(value, str) and "no such query available" in value): raise RuntimeError( f"Invalid endpoint: `{endpoint}`.") key = str(k).upper() if value is None: attributedict[key] = cls.Validator(param=k) elif isinstance(value, Sequence): attributedict[key] = cls.Validator( param=k, haystack={str(v) for v in value}) else: attributedict[key] = cls.Validator(param=k) except Exception as e: logging.debug( f"Unable to construct classes from the server. Reason: `{e}`" ) use_default = True if use_default: if endpoint is not None: logging.debug(f"Using predefined class: `{clsname}`." + ( "" if options.autoload else " Consider specifying `omnipath.options.autoload = True`")) _ = cls._remove_old_members(attributedict) for k, v in zip(old_members, old_values): attributedict[k] = cls.Validator(param=k, doc=v) return super().__new__(cls, clsname, superclasses, attributedict)
def test_maybe_download_not_callable(self, downloader: Downloader): with pytest.raises(TypeError): downloader.maybe_download("foo", callback=None)
def test_initialize_global_options(self): d = Downloader() assert d._options is not opt assert str(d._options) == str(opt)
def test_str_repr(self, options: Options): d = Downloader(options) assert str(d) == f"<{d.__class__.__name__}[options={options}]>" assert repr(d) == f"<{d.__class__.__name__}[options={options}]>"
def test_options_wrong_type(self): with pytest.raises(TypeError): Downloader("foobar")
def __init__(self): self._downloader = Downloader(options)
class OmnipathRequestABC(ABC, metaclass=OmnipathRequestMeta): """Base class for all :mod:`omnipath` requests.""" __string__ = frozenset({"uniprot", "genesymbol"}) __logical__ = frozenset() __categorical__ = frozenset() _json_reader = _error_handler(partial(pd.read_json, typ="frame")) _tsv_reader = _error_handler( partial(pd.read_csv, sep="\t", header=0, squeeze=False, low_memory=False)) _query_type: Optional[QueryType] = None def __init__(self): self._downloader = Downloader(options) @classmethod @d.dedent def resources(cls, **kwargs) -> Tuple[str]: """%(query_resources)s""" return cls()._resources(**kwargs) @classmethod @d.dedent def params(cls) -> Dict[str, Any]: """%(query_params)s""" return {q.param: q.valid for q in cls._query_type.value} @classmethod def _annotations(cls) -> Dict[str, type]: """Return the type annotation for the query parameters.""" return {q.param: q.annotation for q in cls._query_type.value} @classmethod def _docs(cls) -> Dict[str, Optional[str]]: """Return the type annotation for the query parameters.""" return {q.param: q.doc for q in cls._query_type.value} def _get(self, **kwargs) -> pd.DataFrame: kwargs = self._modify_params(kwargs) kwargs = self._inject_fields(kwargs) kwargs, callback = self._convert_params(kwargs) kwargs = self._validate_params(kwargs) kwargs = self._finalize_params(kwargs) res = self._downloader.maybe_download(self._query_type.endpoint, params=kwargs, callback=callback, is_final=False) if self._downloader._options.convert_dtypes: res = self._convert_dtypes(res) return self._post_process(res) def _convert_params( self, params: Dict[str, Any]) -> Tuple[Dict[str, Any], Callable]: organism = params.pop("organism", params.pop("organisms", None)) if organism is not None: organism = Organism(organism) try: params[self._query_type("organism").param] = organism.code except ValueError: pass # check the requested format fmt = params.pop("format", params.pop("formats", None)) fmt = Format(Format.TSV if fmt is None else fmt) if fmt not in (Format.TSV, Format.JSON): logging.warning( f"Invalid `{Key.FORMAT.s}={fmt.s!r}`. Using `{Key.FORMAT.s}={Format.TSV.s!r}`" ) fmt = Format.TSV callback = self._tsv_reader if fmt == Format.TSV else self._json_reader try: params[self._query_type("format").param] = fmt.s except ValueError: pass # check the license license = params.pop( "license", params.pop("licenses", self._downloader._options.license)) if license is not None: license = License(license) try: params[self._query_type("license").param] = license except ValueError: pass if self._downloader._options.password is not None: params.setdefault(Key.PASSWORD.s, self._downloader._options.password) return params, callback def _inject_fields(self, params: Dict[str, Any]) -> Dict[str, Any]: try: _inject_params( params, key=self._query_type(Key.FIELDS.value).param, value=getattr(DEFAULT_FIELD, self._query_type.name).value, ) except AttributeError: # no default field for this query pass except Exception as e: logging.warning( f"Unable to inject `{Key.FIELDS.value}` for `{self}`. Reason: `{e}`" ) return params def _validate_params( self, params: Dict[str, Any]) -> Dict[str, Optional[Union[str, Sequence[str]]]]: """For each passed parameter, validate if it has the correct value.""" res = {} for k, v in params.items(): # first get the validator for the parameter, then validate res[self._query_type(k).param] = self._query_type(k)(v) return res def _finalize_params(self, params: Dict[str, Any]) -> Dict[str, str]: """Convert all the parameters to strings.""" # this is largely redundant res = {} for k, v in params.items(): if isinstance(v, str): res[k] = v elif isinstance(v, bool): res[k] = str(int(v)) elif isinstance(v, (int, float)): res[k] = str(v) elif isinstance(v, Iterable): res[k] = ",".join(sorted(v)) elif isinstance(v, Enum): res[k] = str(v.value) elif v is not None: logging.warning( f"Unable to process parameter `{k}={v}`. Ignoring") return dict(sorted(res.items(), key=itemgetter(0))) def _convert_dtypes(self, res: pd.DataFrame, **_) -> pd.DataFrame: """Automatically convert dtypes for this type of query.""" def to_logical(col: pd.Series) -> pd.Series: if is_numeric_dtype(col): return col > 0 return col.astype(str).str.lower().isin( ("y", "t", "yes", "true", "1")) def handle_logical(df: pd.DataFrame, columns: frozenset) -> None: cols = list(frozenset(df.columns) & columns) if cols: df[cols] = df[cols].apply(to_logical) def handle_categorical(df: pd.DataFrame, columns: frozenset) -> None: cols = frozenset(df.columns) & columns cols = [ col for col, dtype in zip(cols, df[cols].dtypes) if not is_float_dtype(dtype) ] if cols: df[cols] = df[cols].astype("category") def handle_string(df: pd.DataFrame, columns: frozenset) -> None: for col in frozenset(df.columns) & columns: mask = pd.isnull(df[col]) df[col] = df[col].astype(str) df.loc[mask, col] = None if not isinstance(res, pd.DataFrame): raise TypeError( f"Expected the result to be of type `pandas.DataFrame`, found `{type(res).__name__}`." ) handle_logical(res, self.__logical__) handle_categorical(res, self.__categorical__) handle_string(res, self.__string__) return res def _resources(self, **kwargs) -> Tuple[str]: """ Return available resources for this type of query. Parameters ---------- **kwargs Keyword arguments used for filtering unwanted resources. Returns ------- tuple Unique and sorted resources. """ return tuple( sorted( res for res, params in self._downloader.resources.items() if self._query_type.endpoint in params.get(Key.QUERIES.s, {}) and self._resource_filter( params[Key.QUERIES.s][self._query_type.endpoint], **kwargs) )) def _modify_params(self, params: Dict[str, Any]) -> Dict[str, Any]: """ Remove parameters from this query. Parameters ---------- params The parameters to filter. Returns ------- :class:`dict` The filtered parameters. """ return params @abstractmethod def _post_process(self, df: pd.DataFrame) -> pd.DataFrame: """ Post process the result, e.g. by adding extra columns. df The result from :meth:`get`. Returns ------- :class:`pandas.DataFrame` The maybe modified result. """ pass @abstractmethod def _resource_filter(self, data: Mapping[str, Any], **kwargs) -> bool: """ Filter out resources relevant to this query. Parameters ---------- data Data which is used as a basis for the filtering. kwargs Additional keyword arguments. Returns -------- bool `True` if the resource should be included, otherwise `False`. """ pass def __str__(self) -> str: return f"<{self.__class__.__name__}>" def __repr__(self) -> str: return str(self)
def downloader(options) -> "Downloader": return Downloader(options)