def from_useragent_response(cls, url: str, response: Response): return cls( url=url, is_success=response.is_success(), code=response.code(), message=response.message(), content=response.decoded_content(), last_requested_url=response.request().url() if response.request() else None, )
def fetch_url( db: DatabaseHandler, url: str, network_down_host: str = DEFAULT_NETWORK_DOWN_HOST, network_down_port: str = DEFAULT_NETWORK_DOWN_PORT, network_down_timeout: int = DEFAULT_NETWORK_DOWN_TIMEOUT, domain_timeout: typing.Optional[int] = None ) -> typing.Optional[Request]: """Fetch a url and return the content. If fetching the url results in a 400 error, check whether the network_down_host is accessible. If so, return the errored response. Otherwise, wait network_down_timeout seconds and try again. This function catches McGetException and returns a dummy 400 Response object. Arguments: db - db handle url - url to fetch network_down_host - host to check if network is down on error network_down_port - port to check if network is down on error network_down_timeout - seconds to wait if the network is down domain_timeout - value to pass to ThrottledUserAgent() Returns: Response object """ while True: ua = ThrottledUserAgent(db, domain_timeout=domain_timeout) try: response = ua.get_follow_http_html_redirects(url) except mediawords.util.web.user_agent.McGetFollowHTTPHTMLRedirectsException: response = Response(400, 'bad url', {}, 'not a http url') if response.is_success(): return response if response.code() == 400 and _network_is_down(network_down_host, network_down_port): log.warning( "Response failed with %s and network is down. Waiting to retry ..." % (url, )) time.sleep(network_down_timeout) else: return response