示例#1
0
class ProxySelectorMiddleware(object):

    # IOError is raised by the HttpCompression middleware when trying to
    # decompress an empty response
    EXCEPTIONS_TO_RETRY = (
        ServerTimeoutError, UserTimeoutError, DNSLookupError,
        ConnectionRefusedError, ConnectionDone, ConnectError,
        ConnectionLost, TCPTimedOutError,
        IOError)

    def __init__(self, settings):
        if not settings.getbool('RETRY_ENABLED'):
            raise NotConfigured
        self.max_retry_times = settings.getint('RETRY_TIMES')
        self.retry_http_codes = set(
            int(x) for x in settings.getlist('RETRY_HTTP_CODES'))
        self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
        self.proxy_ev = ProxyEvaluator()
        self.proxy_chance = settings.getint('PROXY_CHANCE')
        self.min_level = settings.getint('MIN_LEVEL_FOR_PROXY')

    def process_request(self, request, spider):
        request.meta['dont_redirect'] = True
        if not self.proxy_ev.is_disabled() and self.use_proxy(request):
            p = self.proxy_ev.valid_proxy()
            log.msg("Using proxy = %s on %s" % (p, request.url))
            try:
                request.meta['proxy'] = "http://%s" % p
            except Exception, e:
                log.msg("Exception %s" % e, _level=log.CRITICAL)
        elif 'proxy' in request.meta:
            # Just in case we have received a request
            # with that flag set.
            if 'proxy' in request.meta:
                del(request.meta['proxy'])
示例#2
0
 def __init__(self, settings):
     if not settings.getbool('RETRY_ENABLED'):
         raise NotConfigured
     self.max_retry_times = settings.getint('RETRY_TIMES')
     self.retry_http_codes = set(
         int(x) for x in settings.getlist('RETRY_HTTP_CODES'))
     self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
     self.proxy_ev = ProxyEvaluator()
     self.proxy_chance = settings.getint('PROXY_CHANCE')
     self.min_level = settings.getint('MIN_LEVEL_FOR_PROXY')