def __init__(self, *args, **kwargs): super(TmViewSpider, self).__init__(*args, **kwargs) self.connect = redis.Redis(host='127.0.0.1', port=6379, db=15) try: self.cookie = json.loads(list(eval(self.connect.lindex(ip_cookie_key, 0).decode('utf-8')))[1]) except Exception as e: print(e) from TEST.get_tm_cookies import IPCookie IPCookie().get_cookies() self.cookie = json.loads(list(eval(self.connect.lindex(ip_cookie_key, 0).decode('utf-8')))[1])
def errback_twisted(self, failure): if failure.check(TimeoutError, TCPTimedOutError, DNSLookupError): while True: self.connect.blpop(ip_cookie_key, 1) if self.connect.llen(ip_cookie_key) == 0: break from TEST.get_tm_cookies import IPCookie IPCookie().get_cookies() if failure.check(HttpError): # these exceptions come from HttpError spider middleware # you can get the non-200 response while True: self.connect.blpop(ip_cookie_key, 1) if self.connect.llen(ip_cookie_key) == 0: break from TEST.get_tm_cookies import IPCookie IPCookie().get_cookies() response = failure.response logger.error('HttpError on %s', response.url)
def process_response(self, request, response, spider): # 捕获状态码为40x/50x的response if str(response.status).startswith('4') or str( response.status).startswith('5'): # 随意封装,直接返回response,spider代码中根据url==''来处理response while True: self.connect.blpop(ip_cookie_key, 1) if self.connect.llen(ip_cookie_key) == 0: break from TEST.get_tm_cookies import IPCookie IPCookie().get_cookies() return None # 其他状态码不处理 return response
def process_exception(self, request, exception, spider): # 捕获几乎所有的异常 if isinstance(exception, self.ALL_EXCEPTIONS): # 在日志中打印异常类型 print('Got exception: %s' % (exception)) # 随意封装一个response,返回给spider while True: self.connect.blpop(ip_cookie_key, 1) if self.connect.llen(ip_cookie_key) == 0: break from TEST.get_tm_cookies import IPCookie IPCookie().get_cookies() return None # 打印出未捕获到的异常 print('not contained exception: %s' % exception)