def process_request(self, request, spider): _proxy = Proxy() proxy = _proxy.get() proxy2 = _proxy.get() try: request.meta["proxy"] = "http://" + proxy2 except Exception as e: print(e)
def _retry(self, request, reason, spider): retries = request.meta.get('retry_times', 0) + 1 retry_times = self.max_retry_times if 'max_retry_times' in request.meta: retry_times = request.meta['max_retry_times'] stats = spider.crawler.stats if retries <= retry_times: logger.debug( "Retrying %(request)s (failed %(retries)d times): %(reason)s", { 'request': request, 'retries': retries, 'reason': reason }, extra={'spider': spider}) retryreq = request.copy() retryreq.meta['retry_times'] = retries retryreq.dont_filter = True retryreq.priority = request.priority + self.priority_adjust _proxy = Proxy() number = random.randint(20, 50) proxy_id = _proxy.get_ip(server_id=number) proxy_id = proxy_id.decode() proxy = "http://" + proxy_id + ":9990" retryreq.meta["proxy"] = proxy if isinstance(reason, Exception): reason = global_object_name(reason.__class__) stats.inc_value('retry/count') stats.inc_value('retry/reason_count/%s' % reason) return retryreq else: _proxy = Proxy() proxy = _proxy.get() proxy2 = _proxy.get() proxy3 = _proxy.get() proxy4 = _proxy.get() request.meta["proxy"] = "http://" + proxy4 request.dont_filter = True request.priority = request.priority + self.priority_adjust # return self.process_request(request, spider) return request