def query_url( self, url: str, params: Dict[str, Any] = None, request_type: RequestType = RequestType.GET, post_data: Dict[str, str] = None, auth: HTTPBasicAuth = None) -> Tuple[Dict[str, Any], float]: """Sends a request to this endpoint. Repeats if timeout error occured. Adust the pagesize on timeout. Arguments: url {str} -- URL to be queried. Must contain the server-uri and Endpoint. Does not allow encoded parameters post_data {str} -- additional data with filters/parameters. Only to be send with a POST-Request (default: {None}) auth {HTTPBasicAuth} -- Basic auth to be used to login into SPP via POST-Request(default: {None}) type {RequestType} -- What kind of Request should be made, defaults to GET Raises: ValueError: No URL specified ValueError: Error when requesting endpoint ValueError: Wrong status code ValueError: failed to parse result ValueError: Timeout when sending result ValueError: No post-data/auth is allowed in a GET-Request Returns: Tuple[Dict[str, Any], float] -- Result of the request with the required send time """ if(not url): raise ValueError("no url specified") if((post_data or auth) and request_type == RequestType.GET): raise ValueError("No post-data/auth is allowed in a GET-Request") LOGGER.debug(f"query url: {url}, type: {type}, post_data: {post_data} auth: {True if auth else False}") if(not params): params = {} failed_tries: int = 0 response_query: Optional[Response] = None send_time: float = -1 # prevent unbound var # avoid unset pageSize to not get into SPP defaults if("pageSize" not in params): LOGGER.debug(f"setting pageSize to {self.__page_size} from unset value") params["pageSize"] = self.__page_size elif(params["pageSize"] is None): params.pop("pageSize") while(response_query is None): # send the query try: if(request_type == RequestType.GET): response_query = get( url=url, headers=self.__headers, verify=False, params=params, timeout=(self.__initial_connection_timeout, self.__timeout)) elif(request_type == RequestType.POST): response_query = post( url=url, headers=self.__headers, verify=False, params=params, json=post_data, auth=auth, timeout=(self.__initial_connection_timeout, self.__timeout)) send_time = response_query.elapsed.total_seconds() except ReadTimeout as timeout_error: # timeout occured, increasing failed trys failed_tries += 1 url_params = ConnectionUtils.get_url_params(url) # #### Aborting cases ###### if(failed_tries > self.__max_send_retries): ExceptionUtils.exception_info(error=timeout_error) # read start index for debugging start_index = url_params.get("pageStartIndex", None) page_size = url_params.get("pageSize", None) # report timeout with full information raise ValueError("timeout after repeating a maximum ammount of times.", timeout_error, failed_tries, page_size, start_index) if(self.__page_size == self.__min_page_size): ExceptionUtils.exception_info(error=timeout_error) # read start index for debugging start_index = url_params.get("pageStartIndex", None) page_size = url_params.get("pageSize", None) # report timeout with full information raise ValueError("timeout after using minumum pagesize. repeating the request is of no use.", timeout_error, failed_tries, page_size, start_index) # #### continuing cases ###### if(failed_tries == self.__max_send_retries): # last try LOGGER.debug(f"Timeout error when requesting, now last try of total {self.__max_send_retries}. Reducing pagesize to minimum for url: {url}") if(self.__verbose): LOGGER.info(f"Timeout error when requesting, now last try of total {self.__max_send_retries}. Reducing pagesize to minimum for url: {url}") # persist reduced size for further requests self.__page_size = self.__min_page_size # repeat with minimal possible size LOGGER.debug(f"setting pageSize from {params.get('pageSize', None)} to {self.__page_size}") params["pageSize"] = self.__page_size else: # (failed_tries < self.__max_send_retries): # more then 1 try left LOGGER.debug(f"Timeout error when requesting, now on try {failed_tries} of {self.__max_send_retries}. Reducing pagesizefor url: {url}") if(self.__verbose): LOGGER.info(f"Timeout error when requesting, now on try {failed_tries} of {self.__max_send_retries}. Reducing pagesize for url: {url}") # persist reduced size for further requests self.__page_size = ConnectionUtils.adjust_page_size( page_size=params["pageSize"], min_page_size=self.__min_page_size, timeout=True) # repeat with reduced page size LOGGER.debug(f"setting pageSize from {params.get('pageSize', None)} to {self.__page_size}") params["pageSize"] = self.__page_size except RequestException as error: ExceptionUtils.exception_info(error=error) raise ValueError("error when requesting endpoint", error) if( not response_query.ok): raise ConnectionUtils.rest_response_error( response_query, "Wrong Status code when requesting endpoint data", url) try: response_json: Dict[str, Any] = response_query.json() except (json.decoder.JSONDecodeError, ValueError) as error: raise ValueError("failed to parse query in restAPI request", response_query) return (response_json, send_time)
def get_objects(self, endpoint: str = None, uri: str = None, params: Dict[str, Any] = None, post_data: Dict[str, Any] = None, request_type: RequestType = RequestType.GET, array_name: str = None, allow_list: List[str] = None, ignore_list: List[str] = None, add_time_stamp: bool = False) -> List[Dict[str, Any]]: """Querys a response(-list) from a REST-API endpoint or URI from multiple pages Specify `array_name` if there are multiple results / list. Use allow_list to pick only the values specified. Use ignore_list to pick everything but the values specified. Both: allow_list items overwrite ignore_list items, still getting all not filtered. Param pageSize is only guranteed to be valid for the first page if included within params. Note: Do not specify both endpoint and uri, only uri will be used Keyword Arguments: endpoint {str} -- endpoint to be queried. Either use this or uri (default: {None}) uri {str} -- uri to be queried. Either use this or endpoint (default: {None}) params {Dict[str, Any]} -- Dictionary with all URL-Parameters. pageSize only guranteed to be valid for first page (default: {None}) post_data {Dict[str, Any]} -- Dictionary with Body-Data. Only use on POST-Requests request_type: {RequestType} -- Either GET or POST array_name {str} -- name of array if there are multiple results wanted (default: {None}) allow_list {list} -- list of item to query (default: {None}) ignore_list {list} -- query all but these items(-groups). (default: {None}) add_time_stamp {bool} -- whether to add the capture timestamp (default: {False}) Raises: ValueError: Neither a endpoint nor uri is specfied ValueError: Negative or 0 pagesize ValueError: array_name is specified but it is only a single object Returns: {List[Dict[str, Any]]} -- List of dictonarys as the results """ if(not endpoint and not uri): raise ValueError("neiter endpoint nor uri specified") if(endpoint and uri): LOGGER.debug("added both endpoint and uri. This is unneccessary, uri is ignored") # if neither specifed, get everything if(not allow_list and not ignore_list): ignore_list = [] if(params is None): params = {} # create uri out of endpoint if(endpoint): next_page = self.get_url(endpoint) else: next_page = uri result_list: List[Dict[str, Any]] = [] # Aborts if no nextPage is found while(next_page): LOGGER.debug(f"Collected {len(result_list)} items until now. Next page: {next_page}") if(self.__verbose): LOGGER.info(f"Collected {len(result_list)} items until now. Next page: {next_page}") # Request response (response, send_time) = self.query_url(next_page, params, request_type, post_data) # find follow page if available and set it (_, next_page_link) = SppUtils.get_nested_kv(key_name="links.nextPage.href", nested_dict=response) next_page: Optional[str] = next_page_link if(next_page): # Overwrite params with params from next link params = ConnectionUtils.get_url_params(next_page) # remove params from page next_page = ConnectionUtils.url_set_params(next_page, None) # Check if single object or not if(array_name): # get results for this page, if empty nothing happens page_result_list: Optional[List[Dict[str, Any]]] = response.get(array_name, None) if(page_result_list is None): raise ValueError("array_name does not exist, this is probably a single object") else: page_result_list = [response] filtered_results = ConnectionUtils.filter_values_dict( result_list=page_result_list, allow_list=allow_list, ignore_list=ignore_list) if(add_time_stamp): # direct time add to make the timestamps represent the real capture time for mydict in filtered_results: time_key, time_val = SppUtils.get_capture_timestamp_sec() mydict[time_key] = time_val result_list.extend(filtered_results) # adjust pagesize if either the send time is too high # or regulary adjust on max-page sizes requests # dont adjust if page isnt full and therefore too quick if(send_time > self.__preferred_time or len(page_result_list) == self.__page_size): LOGGER.debug(f"send_time: {send_time}, len: {len(page_result_list)}, pageSize = {self.__page_size} ") self.__page_size = ConnectionUtils.adjust_page_size( page_size=len(page_result_list), min_page_size=self.__min_page_size, preferred_time=self.__preferred_time, send_time=send_time) LOGGER.debug(f"Changed pageSize from {len(page_result_list)} to {self.__page_size} ") params["pageSize"] = self.__page_size LOGGER.debug("objectList size %d", len(result_list)) return result_list