def _query(self, link: str = None) -> dict:
        """Query helper function"""
        # If a complete link is provided, use it straight up
        if link is not None:
            try:
                link = ordered_query_url(link)
                response = SESSION.get(link, timeout=TIMEOUT_SECONDS)
                if response.from_cache:
                    LOGGER.debug("Request to %s was taken from cache !", link)
                response = response.json()
            except (
                    requests.exceptions.ConnectTimeout,
                    requests.exceptions.ConnectionError,
                    requests.exceptions.ReadTimeout,
            ) as exc:
                response = {
                    "errors": {
                        "msg": "CLIENT: Connection error or timeout.",
                        "url": link,
                        "Exception": repr(exc),
                    }
                }
            except JSONDecodeError as exc:
                response = {
                    "errors": {
                        "msg": "CLIENT: Could not decode response to JSON.",
                        "url": link,
                        "Exception": repr(exc),
                    }
                }
            return response

        # Avoid structures with null positions and with assemblies.
        add_to_filter = 'NOT structure_features HAS ANY "assemblies"'
        if not self._uses_new_structure_features():
            add_to_filter += ',"unknown_positions"'

        optimade_filter = self.filters.collect_value()
        optimade_filter = ("( {} ) AND ( {} )".format(optimade_filter,
                                                      add_to_filter)
                           if optimade_filter and add_to_filter else
                           optimade_filter or add_to_filter or None)
        LOGGER.debug("Querying with filter: %s", optimade_filter)

        # OPTIMADE queries
        queries = {
            "base_url": self.database[1].base_url,
            "filter": optimade_filter,
            "page_limit": self.page_limit,
            "page_offset": self.offset,
            "page_number": self.number,
            "sort": self.sorting,
        }
        LOGGER.debug(
            "Parameters (excluding filter) sent to query util func: %s",
            {key: value
             for key, value in queries.items() if key != "filter"},
        )

        return perform_optimade_query(**queries)
Пример #2
0
def test_ordered_query_url():
    """Check ordered_query_url().

    Testing already sorted URLs, making sure they come out exactly the same as when they came in.
    """
    from optimade_client.utils import ordered_query_url

    normal_url = (
        "https://optimade.materialsproject.org/v1/structures?filter=%28+nelements%3E%3D1+AND+"
        "nelements%3C%3D9+AND+nsites%3E%3D1+AND+nsites%3C%3D444+%29+AND+%28+NOT+structure_features"
        "+HAS+ANY+%22assemblies%22+%29&page_limit=25&page_number=1&page_offset=30&response_format"
        "=json")
    multi_query_param_url = (
        "https://optimade.materialsproject.org/v1/structures?filter=%28+nelements%3E%3D1+AND+"
        "nelements%3C%3D9+AND+nsites%3E%3D1+AND+nsites%3C%3D444+%29+AND+%28+NOT+structure_features"
        "+HAS+ANY+%22assemblies%22+%29&page_limit=25&page_number=1&page_offset=30&response_format"
        "=json&response_format=xml")

    ordered_url = ordered_query_url(normal_url)
    assert ordered_url == normal_url

    ordered_url = ordered_query_url(multi_query_param_url)
    assert ordered_url == multi_query_param_url
    def _query(  # pylint: disable=too-many-locals,too-many-branches,too-many-statements
            self,
            link: str = None,
            exclude_ids: List[str] = None) -> Tuple[List[dict], dict, int,
                                                    int]:
        """Query helper function"""
        # If a complete link is provided, use it straight up
        if link is not None:
            try:
                if exclude_ids:
                    filter_value = " AND ".join(
                        [f'NOT id="{id_}"' for id_ in exclude_ids])

                    parsed_url = urllib.parse.urlparse(link)
                    queries = urllib.parse.parse_qs(parsed_url.query)
                    # Since parse_qs wraps all values in a list,
                    # this extracts the values from the list(s).
                    queries = {key: value[0] for key, value in queries.items()}

                    if "filter" in queries:
                        queries[
                            "filter"] = f"( {queries['filter']} ) AND ( {filter_value} )"
                    else:
                        queries["filter"] = filter_value

                    parsed_query = urllib.parse.urlencode(queries)

                    link = (
                        f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}"
                        f"?{parsed_query}")

                link = ordered_query_url(link)
                response = SESSION.get(link, timeout=TIMEOUT_SECONDS)
                if response.from_cache:
                    LOGGER.debug("Request to %s was taken from cache !", link)
                response = response.json()
            except (
                    requests.exceptions.ConnectTimeout,
                    requests.exceptions.ConnectionError,
                    requests.exceptions.ReadTimeout,
            ) as exc:
                response = {
                    "errors": {
                        "msg": "CLIENT: Connection error or timeout.",
                        "url": link,
                        "Exception": repr(exc),
                    }
                }
            except json.JSONDecodeError as exc:
                response = {
                    "errors": {
                        "msg": "CLIENT: Could not decode response to JSON.",
                        "url": link,
                        "Exception": repr(exc),
                    }
                }
        else:
            filter_ = '( link_type="child" OR type="child" )'
            if exclude_ids:
                filter_ += (
                    " AND ( " +
                    " AND ".join([f'NOT id="{id_}"'
                                  for id_ in exclude_ids]) + " )")

            response = perform_optimade_query(
                filter=filter_,
                base_url=self.provider.base_url,
                endpoint="/links",
                page_limit=self.child_db_limit,
                page_offset=self.offset,
                page_number=self.number,
            )
        msg, http_errors = handle_errors(response)
        if msg:
            if 404 in http_errors:
                # If /links not found move on
                pass
            else:
                self.error_or_status_messages.value = msg
                raise QueryError(msg=msg, remove_target=True)

        # Check implementation API version
        msg = validate_api_version(response.get("meta",
                                                {}).get("api_version", ""),
                                   raise_on_fail=False)
        if msg:
            self.error_or_status_messages.value = (
                f"{msg}<br>The provider has been removed.")
            raise QueryError(msg=msg, remove_target=True)

        LOGGER.debug(
            "Manually remove `exclude_ids` if filters are not supported")
        child_db_data = {
            impl.get("id", "N/A"): impl
            for impl in response.get("data", [])
        }
        if exclude_ids:
            for links_id in exclude_ids:
                if links_id in list(child_db_data.keys()):
                    child_db_data.pop(links_id)
            LOGGER.debug("child_db_data after popping: %r", child_db_data)
            response["data"] = list(child_db_data.values())
            if "meta" in response:
                if "data_available" in response["meta"]:
                    old_data_available = response["meta"].get(
                        "data_available", 0)
                    if len(response["data"]) > old_data_available:
                        LOGGER.debug("raising OptimadeClientError")
                        raise OptimadeClientError(
                            f"Reported data_available ({old_data_available}) is smaller than "
                            f"curated list of responses ({len(response['data'])}).",
                        )
                response["meta"]["data_available"] = len(response["data"])
            else:
                raise OptimadeClientError(
                    "'meta' not found in response. Bad response")

        LOGGER.debug(
            "Attempt for %r (in /links): Found implementations (names+base_url only):\n%s",
            self.provider.name,
            [
                f"(id: {name}; base_url: {base_url}) " for name, base_url in [(
                    impl.get("id", "N/A"),
                    impl.get("attributes", {}).get("base_url", "N/A"),
                ) for impl in response.get("data", [])]
            ],
        )
        # Return all implementations of link_type "child"
        implementations = [
            implementation for implementation in response.get("data", [])
            if (implementation.get("attributes", {}).get("link_type", "") ==
                "child" or implementation.get("type", "") == "child")
        ]
        LOGGER.debug(
            "After curating for implementations which are of 'link_type' = 'child' or 'type' == "
            "'child' (old style):\n%s",
            [
                f"(id: {name}; base_url: {base_url}) " for name, base_url in [(
                    impl.get("id", "N/A"),
                    impl.get("attributes", {}).get("base_url", "N/A"),
                ) for impl in implementations]
            ],
        )

        # Get links, data_returned, and data_available
        links = response.get("links", {})
        data_returned = response.get("meta", {}).get("data_returned",
                                                     len(implementations))
        if data_returned > 0 and not implementations:
            # Most probably dealing with pre-v1.0.0-rc.2 implementations
            data_returned = 0
        data_available = response.get("meta", {}).get("data_available", 0)

        return implementations, links, data_returned, data_available