def _query(self, link: str = None) -> dict: """Query helper function""" # If a complete link is provided, use it straight up if link is not None: try: link = ordered_query_url(link) response = SESSION.get(link, timeout=TIMEOUT_SECONDS) if response.from_cache: LOGGER.debug("Request to %s was taken from cache !", link) response = response.json() except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout, ) as exc: response = { "errors": { "msg": "CLIENT: Connection error or timeout.", "url": link, "Exception": repr(exc), } } except JSONDecodeError as exc: response = { "errors": { "msg": "CLIENT: Could not decode response to JSON.", "url": link, "Exception": repr(exc), } } return response # Avoid structures with null positions and with assemblies. add_to_filter = 'NOT structure_features HAS ANY "assemblies"' if not self._uses_new_structure_features(): add_to_filter += ',"unknown_positions"' optimade_filter = self.filters.collect_value() optimade_filter = ("( {} ) AND ( {} )".format(optimade_filter, add_to_filter) if optimade_filter and add_to_filter else optimade_filter or add_to_filter or None) LOGGER.debug("Querying with filter: %s", optimade_filter) # OPTIMADE queries queries = { "base_url": self.database[1].base_url, "filter": optimade_filter, "page_limit": self.page_limit, "page_offset": self.offset, "page_number": self.number, "sort": self.sorting, } LOGGER.debug( "Parameters (excluding filter) sent to query util func: %s", {key: value for key, value in queries.items() if key != "filter"}, ) return perform_optimade_query(**queries)
def test_ordered_query_url(): """Check ordered_query_url(). Testing already sorted URLs, making sure they come out exactly the same as when they came in. """ from optimade_client.utils import ordered_query_url normal_url = ( "https://optimade.materialsproject.org/v1/structures?filter=%28+nelements%3E%3D1+AND+" "nelements%3C%3D9+AND+nsites%3E%3D1+AND+nsites%3C%3D444+%29+AND+%28+NOT+structure_features" "+HAS+ANY+%22assemblies%22+%29&page_limit=25&page_number=1&page_offset=30&response_format" "=json") multi_query_param_url = ( "https://optimade.materialsproject.org/v1/structures?filter=%28+nelements%3E%3D1+AND+" "nelements%3C%3D9+AND+nsites%3E%3D1+AND+nsites%3C%3D444+%29+AND+%28+NOT+structure_features" "+HAS+ANY+%22assemblies%22+%29&page_limit=25&page_number=1&page_offset=30&response_format" "=json&response_format=xml") ordered_url = ordered_query_url(normal_url) assert ordered_url == normal_url ordered_url = ordered_query_url(multi_query_param_url) assert ordered_url == multi_query_param_url
def _query( # pylint: disable=too-many-locals,too-many-branches,too-many-statements self, link: str = None, exclude_ids: List[str] = None) -> Tuple[List[dict], dict, int, int]: """Query helper function""" # If a complete link is provided, use it straight up if link is not None: try: if exclude_ids: filter_value = " AND ".join( [f'NOT id="{id_}"' for id_ in exclude_ids]) parsed_url = urllib.parse.urlparse(link) queries = urllib.parse.parse_qs(parsed_url.query) # Since parse_qs wraps all values in a list, # this extracts the values from the list(s). queries = {key: value[0] for key, value in queries.items()} if "filter" in queries: queries[ "filter"] = f"( {queries['filter']} ) AND ( {filter_value} )" else: queries["filter"] = filter_value parsed_query = urllib.parse.urlencode(queries) link = ( f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}" f"?{parsed_query}") link = ordered_query_url(link) response = SESSION.get(link, timeout=TIMEOUT_SECONDS) if response.from_cache: LOGGER.debug("Request to %s was taken from cache !", link) response = response.json() except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout, ) as exc: response = { "errors": { "msg": "CLIENT: Connection error or timeout.", "url": link, "Exception": repr(exc), } } except json.JSONDecodeError as exc: response = { "errors": { "msg": "CLIENT: Could not decode response to JSON.", "url": link, "Exception": repr(exc), } } else: filter_ = '( link_type="child" OR type="child" )' if exclude_ids: filter_ += ( " AND ( " + " AND ".join([f'NOT id="{id_}"' for id_ in exclude_ids]) + " )") response = perform_optimade_query( filter=filter_, base_url=self.provider.base_url, endpoint="/links", page_limit=self.child_db_limit, page_offset=self.offset, page_number=self.number, ) msg, http_errors = handle_errors(response) if msg: if 404 in http_errors: # If /links not found move on pass else: self.error_or_status_messages.value = msg raise QueryError(msg=msg, remove_target=True) # Check implementation API version msg = validate_api_version(response.get("meta", {}).get("api_version", ""), raise_on_fail=False) if msg: self.error_or_status_messages.value = ( f"{msg}<br>The provider has been removed.") raise QueryError(msg=msg, remove_target=True) LOGGER.debug( "Manually remove `exclude_ids` if filters are not supported") child_db_data = { impl.get("id", "N/A"): impl for impl in response.get("data", []) } if exclude_ids: for links_id in exclude_ids: if links_id in list(child_db_data.keys()): child_db_data.pop(links_id) LOGGER.debug("child_db_data after popping: %r", child_db_data) response["data"] = list(child_db_data.values()) if "meta" in response: if "data_available" in response["meta"]: old_data_available = response["meta"].get( "data_available", 0) if len(response["data"]) > old_data_available: LOGGER.debug("raising OptimadeClientError") raise OptimadeClientError( f"Reported data_available ({old_data_available}) is smaller than " f"curated list of responses ({len(response['data'])}).", ) response["meta"]["data_available"] = len(response["data"]) else: raise OptimadeClientError( "'meta' not found in response. Bad response") LOGGER.debug( "Attempt for %r (in /links): Found implementations (names+base_url only):\n%s", self.provider.name, [ f"(id: {name}; base_url: {base_url}) " for name, base_url in [( impl.get("id", "N/A"), impl.get("attributes", {}).get("base_url", "N/A"), ) for impl in response.get("data", [])] ], ) # Return all implementations of link_type "child" implementations = [ implementation for implementation in response.get("data", []) if (implementation.get("attributes", {}).get("link_type", "") == "child" or implementation.get("type", "") == "child") ] LOGGER.debug( "After curating for implementations which are of 'link_type' = 'child' or 'type' == " "'child' (old style):\n%s", [ f"(id: {name}; base_url: {base_url}) " for name, base_url in [( impl.get("id", "N/A"), impl.get("attributes", {}).get("base_url", "N/A"), ) for impl in implementations] ], ) # Get links, data_returned, and data_available links = response.get("links", {}) data_returned = response.get("meta", {}).get("data_returned", len(implementations)) if data_returned > 0 and not implementations: # Most probably dealing with pre-v1.0.0-rc.2 implementations data_returned = 0 data_available = response.get("meta", {}).get("data_available", 0) return implementations, links, data_returned, data_available