def replace(self, *args, **kwargs): kwargs.setdefault('encoding', self.encoding) return Response.replace(self, *args, **kwargs)
def process_response(self, request: Request, response: Response, spider: Spider) -> Response: try: crawlera_meta = request.meta[META_KEY] except KeyError: crawlera_meta = {} if crawlera_meta.get("skip") or not crawlera_meta.get("original_request"): return response original_request = request_from_dict(crawlera_meta["original_request"], spider=spider) self.stats.inc_value("crawlera_fetch/response_count") self._calculate_latency(request) self.stats.inc_value("crawlera_fetch/api_status_count/{}".format(response.status)) if response.headers.get("X-Crawlera-Error"): message = response.headers["X-Crawlera-Error"].decode("utf8") self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value("crawlera_fetch/response_error/{}".format(message)) log_msg = "Error downloading <{} {}> (status: {}, X-Crawlera-Error header: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, message, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.warning(log_msg) return response try: json_response = json.loads(response.text) except json.JSONDecodeError as exc: self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value("crawlera_fetch/response_error/JSONDecodeError") log_msg = "Error decoding <{} {}> (status: {}, message: {}, lineno: {}, colno: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, exc.msg, exc.lineno, exc.colno, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) from exc else: logger.warning(log_msg) return response server_error = json_response.get("crawlera_error") or json_response.get("error_code") original_status = json_response.get("original_status") request_id = json_response.get("id") or json_response.get("uncork_id") if server_error: message = json_response.get("body") or json_response.get("message") self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value("crawlera_fetch/response_error/{}".format(server_error)) log_msg = ( "Error downloading <{} {}> (Original status: {}, " "Fetch API error message: {}, Request ID: {})" ) log_msg = log_msg.format( original_request.method, original_request.url, original_status or "unknown", message, request_id or "unknown", ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.warning(log_msg) return response self.stats.inc_value("crawlera_fetch/response_status_count/{}".format(original_status)) crawlera_meta["upstream_response"] = { "status": response.status, "headers": response.headers, "body": json_response, } try: resp_body = base64.b64decode(json_response["body"], validate=True) except (binascii.Error, ValueError): resp_body = json_response["body"] respcls = responsetypes.from_args( headers=json_response["headers"], url=json_response["url"], body=resp_body, ) return response.replace( cls=respcls, request=original_request, headers=json_response["headers"], url=json_response["url"], body=resp_body, status=original_status or 200, )
def process_response(self, request: Request, response: Response, spider: Spider) -> Response: try: crawlera_meta = request.meta[META_KEY] except KeyError: crawlera_meta = {} if crawlera_meta.get( "skip") or not crawlera_meta.get("original_request"): return response original_request = request_from_dict(crawlera_meta["original_request"]) self.stats.inc_value("crawlera_fetch/response_count") self._calculate_latency(request) self.stats.inc_value("crawlera_fetch/api_status_count/{}".format( response.status)) if response.headers.get("X-Crawlera-Error"): message = response.headers["X-Crawlera-Error"].decode("utf8") self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value( "crawlera_fetch/response_error/{}".format(message)) log_msg = "Error downloading <{} {}> (status: {}, X-Crawlera-Error header: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, message, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.error(log_msg) return response try: json_response = json.loads(response.text) except json.JSONDecodeError as exc: self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value( "crawlera_fetch/response_error/JSONDecodeError") log_msg = "Error decoding <{} {}> (status: {}, message: {}, lineno: {}, colno: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, exc.msg, exc.lineno, exc.colno, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) from exc else: logger.error(log_msg) return response if json_response.get("crawlera_error"): error = json_response["crawlera_error"] message = json_response["body"] self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value( "crawlera_fetch/response_error/{}".format(error)) log_msg = ( "Error downloading <{} {}> (Original status: {}, Fetch API error message: {})" ) log_msg = log_msg.format( original_request.method, original_request.url, json_response["original_status"], message, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.error(log_msg) return response self.stats.inc_value("crawlera_fetch/response_status_count/{}".format( json_response["original_status"])) crawlera_meta["upstream_response"] = { "status": response.status, "headers": response.headers, "body": json_response, } respcls = responsetypes.from_args( headers=json_response["headers"], url=json_response["url"], body=json_response["body"], ) return response.replace( cls=respcls, request=original_request, headers=json_response["headers"], url=json_response["url"], body=json_response["body"], status=json_response["original_status"], )
def replace(self, *args, **kwargs): kwargs.setdefault("encoding", getattr(self, "_encoding", None)) return Response.replace(self, *args, **kwargs)
def replace(self, *args, **kwargs): kwargs.setdefault('encoding', self.encoding) return Response.replace( self, *args, **kwargs) # 原来如此,这里的self可太骚了. 将自己作为instance传递过去而来