def __prepare_request(self, request: Request) -> requests.PreparedRequest: """Create PreparedRequest from UserAgent's Request. Raises if one or more parameters are invalid.""" method = request.method() if method is None: raise McRequestException("Request's method is None.") url = request.url() if url is None: raise McRequestException("Request's URL is None.") headers = request.headers() if headers is None: raise McRequestException("Request's headers is None.") auth_username = request.auth_username() auth_password = request.auth_password() if ((auth_username is None and auth_password is not None) or (auth_username is not None and auth_password is None)): raise McRequestException( "Either both or none of HTTP authentication credentials must be not None." ) auth = None if auth_username is not None and auth_password is not None: if ((len(auth_username) == 0 and len(auth_password) > 0) or (len(auth_username) > 0 and len(auth_password) == 0)): raise McRequestException( "Either both or none of HTTP authentication credentials must be not Empty." ) auth = HTTPBasicAuth(auth_username, auth_password) data = request.content() try: requests_request = requests.Request( method=method, url=url, data=data, headers=headers, auth=auth, ) requests_prepared_request = self.__session.prepare_request( requests_request) except Exception as ex: raise McRequestException("Unable to prepare request %s: %s" % ( str(request), str(ex), )) return requests_prepared_request
def __prepare_request(self, request: Request) -> requests.PreparedRequest: """Create PreparedRequest from UserAgent's Request. Raises if one or more parameters are invalid.""" method = request.method() if method is None: raise McRequestException("Request's method is None.") url = request.url() if url is None: raise McRequestException("Request's URL is None.") headers = request.headers() if headers is None: raise McRequestException("Request's headers is None.") auth_username = request.auth_username() auth_password = request.auth_password() if ((auth_username is None and auth_password is not None) or ( auth_username is not None and auth_password is None)): raise McRequestException("Either both or none of HTTP authentication credentials must be not None.") auth = None if auth_username is not None and auth_password is not None: if ((len(auth_username) == 0 and len(auth_password) > 0) or ( len(auth_username) > 0 and len(auth_password) == 0)): raise McRequestException("Either both or none of HTTP authentication credentials must be not Empty.") auth = HTTPBasicAuth(auth_username, auth_password) data = request.content() try: requests_request = requests.Request( method=method, url=url, data=data, headers=headers, auth=auth, ) requests_prepared_request = self.__session.prepare_request(requests_request) except Exception as ex: raise McRequestException("Unable to prepare request %s: %s" % (str(request), str(ex),)) return requests_prepared_request
def request(self, request: Request) -> Response: """Execute a request, return a response. All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted URLs etc.""" if request is None: raise McRequestException("Request is None.") request = self.__blacklist_request_if_needed(request=request) self.__log_request(request=request) method = request.method() if method is None: raise McRequestException("Request's method is None.") url = request.url() if url is None: raise McRequestException("Request's URL is None.") headers = request.headers() if headers is None: raise McRequestException("Request's headers is None.") auth_username = request.auth_username() auth_password = request.auth_password() if ((auth_username is None and auth_password is not None) or (auth_username is not None and auth_password is None)): raise McRequestException( "Either both or none of HTTP authentication credentials must be not None." ) auth = None if auth_username is not None and auth_password is not None: if ((len(auth_username) == 0 and len(auth_password) > 0) or (len(auth_username) > 0 and len(auth_password) == 0)): raise McRequestException( "Either both or none of HTTP authentication credentials must be not Empty." ) auth = HTTPBasicAuth(auth_username, auth_password) data = request.content() try: requests_request = requests.Request( method=method, url=url, data=data, headers=headers, auth=auth, ) requests_prepared_request = self.__session.prepare_request( requests_request) except Exception as ex: raise McRequestException("Unable to prepare request %s: %s" % ( str(request), str(ex), )) error_is_client_side = False try: requests_response = self.__session.send( request=requests_prepared_request, timeout=self.timeout(), # To be able to enforce max_size stream=True, ) except requests.TooManyRedirects as ex: # On too many redirects, return the last fetched page (just like LWP::UserAgent does) log.warning("Exceeded max. redirects for URL %s" % request.url()) requests_response = ex.response response_data = str(ex) except requests.Timeout as ex: log.warning("Timeout for URL %s" % request.url()) # We treat timeouts as client-side errors too because we can retry on them error_is_client_side = True requests_response = requests.Response() requests_response.status_code = HTTPStatus.REQUEST_TIMEOUT.value requests_response.reason = HTTPStatus.REQUEST_TIMEOUT.phrase requests_response.request = requests_prepared_request requests_response.history = [] response_data = str(ex) except Exception as ex: # Client-side error log.warning("Client-side error while processing request %s: %s" % ( str(request), str(ex), )) error_is_client_side = True requests_response = requests.Response() requests_response.status_code = HTTPStatus.BAD_REQUEST.value requests_response.reason = "Client-side error" requests_response.request = requests_prepared_request # Previous request / response chain is not built for client-side errored requests requests_response.history = [] requests_response.headers = { # LWP::UserAgent compatibility 'Client-Warning': 'Client-side error', } response_data = str(ex) else: try: max_size = self.max_size() response_data = "" read_response_data = True if max_size is not None: content_length = requests_response.headers.get( 'Content-Length', None) if content_length is not None: content_length = int(content_length) if content_length > max_size: log.warning( "Content-Length exceeds %d for URL %s" % ( max_size, url, )) # Release the response to return connection back to the pool # (http://docs.python-requests.org/en/master/user/advanced/#body-content-workflow) requests_response.close() read_response_data = False if read_response_data: if requests_response.encoding is None: if requests_response.apparent_encoding is None: # If encoding is not in HTTP headers nor can be determined from content itself, assume that # it's UTF-8 requests_response.encoding = 'UTF-8' else: # Test the encoding guesser's opinion, just like browsers do requests_response.encoding = requests_response.apparent_encoding else: # If "Content-Type" HTTP header contains a string "text" and doesn't have "charset" property, # "requests" falls back to setting the encoding to ISO-8859-1, which is probably not right # (encoding might have been defined in the HTML content itself via <meta> tag), so we use the # "apparent encoding" instead if requests_response.encoding.lower() == 'iso-8859-1': if requests_response.apparent_encoding is not None: requests_response.encoding = requests_response.apparent_encoding # Some pages report some funky encoding; in that case, fallback to UTF-8 try: codecs.lookup(requests_response.encoding) except LookupError: log.warning("Invalid encoding %s for URL %s" % (requests_response.encoding, requests_response.url)) requests_response.encoding = 'UTF-8' response_data_size = 0 for chunk in requests_response.iter_content( chunk_size=None, decode_unicode=True): response_data += chunk response_data_size += len(chunk) # Content-Length might be missing / lying, so we measure size while fetching the data too if max_size is not None: if response_data_size > max_size: log.warning("Data size exceeds %d for URL %s" % ( max_size, url, )) # Release the response to return connection back to the pool # (http://docs.python-requests.org/en/master/user/advanced/#body-content-workflow) requests_response.close() break except requests.RequestException as ex: log.warning("Error reading data for URL %s" % request.url()) # We treat timeouts as client-side errors too because we can retry on them error_is_client_side = True requests_response = requests.Response() requests_response.status_code = HTTPStatus.REQUEST_TIMEOUT.value requests_response.reason = HTTPStatus.REQUEST_TIMEOUT.phrase requests_response.request = requests_prepared_request requests_response.history = [] response_data = str(ex) if requests_response is None: raise McRequestException("Response from 'requests' is None.") if response_data is None: # Probably a programming error raise McRequestException("Response data is None.") response = Response.from_requests_response( requests_response=requests_response, data=response_data, ) if error_is_client_side: response.set_error_is_client_side( error_is_client_side=error_is_client_side) # Build the previous request / response chain from the redirects current_response = response for previous_rq_response in reversed(requests_response.history): previous_rq_request = previous_rq_response.request previous_response_request = Request.from_requests_prepared_request( requests_prepared_request=previous_rq_request) previous_response = Response.from_requests_response( requests_response=previous_rq_response) previous_response.set_request(request=previous_response_request) current_response.set_previous(previous=previous_response) current_response = previous_response # Redirects might have happened, so we have to recreate the request object from the latest page that was # redirected to response_request = Request.from_requests_prepared_request( requests_prepared_request=requests_response.request) response.set_request(response_request) return response