Пример #1
0
 def __init__(self, *args, **kwargs):
     super(TmViewSpider, self).__init__(*args, **kwargs)
     self.connect = redis.Redis(host='127.0.0.1', port=6379, db=15)
     try:
         self.cookie = json.loads(list(eval(self.connect.lindex(ip_cookie_key, 0).decode('utf-8')))[1])
     except Exception as e:
         print(e)
         from TEST.get_tm_cookies import IPCookie
         IPCookie().get_cookies()
         self.cookie = json.loads(list(eval(self.connect.lindex(ip_cookie_key, 0).decode('utf-8')))[1])
Пример #2
0
    def errback_twisted(self, failure):
        if failure.check(TimeoutError, TCPTimedOutError, DNSLookupError):
            while True:
                self.connect.blpop(ip_cookie_key, 1)
                if self.connect.llen(ip_cookie_key) == 0:
                    break

            from TEST.get_tm_cookies import IPCookie
            IPCookie().get_cookies()
        if failure.check(HttpError):
            # these exceptions come from HttpError spider middleware
            # you can get the non-200 response
            while True:
                self.connect.blpop(ip_cookie_key, 1)
                if self.connect.llen(ip_cookie_key) == 0:
                    break
            from TEST.get_tm_cookies import IPCookie
            IPCookie().get_cookies()

            response = failure.response
            logger.error('HttpError on %s', response.url)
Пример #3
0
    def process_response(self, request, response, spider):
        # 捕获状态码为40x/50x的response
        if str(response.status).startswith('4') or str(
                response.status).startswith('5'):
            # 随意封装,直接返回response,spider代码中根据url==''来处理response
            while True:
                self.connect.blpop(ip_cookie_key, 1)
                if self.connect.llen(ip_cookie_key) == 0:
                    break
            from TEST.get_tm_cookies import IPCookie
            IPCookie().get_cookies()

            return None
        # 其他状态码不处理
        return response
Пример #4
0
    def process_exception(self, request, exception, spider):
        # 捕获几乎所有的异常
        if isinstance(exception, self.ALL_EXCEPTIONS):
            # 在日志中打印异常类型
            print('Got exception: %s' % (exception))
            # 随意封装一个response,返回给spider
            while True:
                self.connect.blpop(ip_cookie_key, 1)
                if self.connect.llen(ip_cookie_key) == 0:
                    break
            from TEST.get_tm_cookies import IPCookie
            IPCookie().get_cookies()

            return None
        # 打印出未捕获到的异常
        print('not contained exception: %s' % exception)