def test_with_metakey_lesser(self): meta_max_retry_times = 4 middleware_max_retry_times = 5 req1 = Request(self.invalid_url, meta={'max_retry_times': meta_max_retry_times}) req2 = Request(self.invalid_url) settings = {'RETRY_TIMES': middleware_max_retry_times} spider, middleware = self.get_spider_and_middleware(settings) self._test_retry( req1, DNSLookupError('foo'), meta_max_retry_times, spider=spider, middleware=middleware, ) self._test_retry( req2, DNSLookupError('foo'), middleware_max_retry_times, spider=spider, middleware=middleware, )
def test_with_metakey_lesser(self): # SETINGS: RETRY_TIMES > meta(max_retry_times) self.mw.max_retry_times = 5 meta_max_retry_times = 4 req1 = Request(self.invalid_url, meta={'max_retry_times': meta_max_retry_times}) req2 = Request(self.invalid_url) self._test_retry(req1, DNSLookupError('foo'), meta_max_retry_times) self._test_retry(req2, DNSLookupError('foo'), self.mw.max_retry_times)
def test_with_metakey_zero(self): # SETTINGS: meta(max_retry_times) = 0 meta_max_retry_times = 0 req = Request(self.invalid_url, meta={'max_retry_times': meta_max_retry_times}) self._test_retry(req, DNSLookupError('foo'), meta_max_retry_times)
def test_with_settings_zero(self): # SETTINGS: RETRY_TIMES = 0 self.mw.max_retry_times = 0 req = Request(self.invalid_url) self._test_retry(req, DNSLookupError('foo'), self.mw.max_retry_times)
def test_without_metakey(self): # SETTINGS: RETRY_TIMES is NON-ZERO self.mw.max_retry_times = 5 req = Request(self.invalid_url) self._test_retry(req, DNSLookupError('foo'), self.mw.max_retry_times)
def _ebMX(self, failure, domain): self.log.error("DNS Error for domain %s: %s", domain, failure) from twisted.names import error as dns_error, dns if self.fallbackToDomain: failure.trap(dns_error.DNSNameError) self.log.error( "MX lookup failed; attempting to use hostname (%s) directly" % (domain, )) # Alright, I admit, this is a bit icky. d = self.resolver.getHostByName(domain) def cbResolved(addr): return [dns.Record_MX(name=addr)] def ebResolved(err): err.trap(dns_error.DNSNameError) raise DNSLookupError() d.addCallbacks(cbResolved, ebResolved) return d elif failure.check(dns_error.DNSNameError): self.log.error("No MX records for %r", domain) raise DNSLookupError("No MX found for %r" % (domain, )) self.log.error("Error during MX query for domain '%s': %s", domain, failure) return failure
def test_dont_retry_exc(self): req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True}) rsp = Response('http://www.scrapytest.org/503', body='', status=503) r = self.mw.process_exception(req, DNSLookupError(), self.spider) assert r is None
def resolutionComplete(self): """ See L{IResolutionReceiver.resolutionComplete} """ if self._resolved: return self._deferred.errback(DNSLookupError(self._resolution.name))
def resolve(self, hostname): """ Resolve a hostname by looking it up in the C{names} dictionary. """ try: return defer.succeed(self.names[hostname]) except KeyError: return defer.fail( DNSLookupError("FakeResolverReactor couldn't find " + hostname))
def resolutionComplete(self): """ See L{IResolutionReceiver.resolutionComplete} """ if self._results: random.shuffle(self._results) self._deferred.callback(self._results[0]) else: self._deferred.errback(DNSLookupError(self._resolution.name))
def test_with_dont_retry(self): # SETTINGS: meta(max_retry_times) = 4 meta_max_retry_times = 4 req = Request(self.invalid_url, meta= \ {'max_retry_times': meta_max_retry_times, 'dont_retry': True}) self._test_retry(req, DNSLookupError('foo'), 0)
def test_without_metakey(self): max_retry_times = 5 settings = {'RETRY_TIMES': max_retry_times} spider, middleware = self.get_spider_and_middleware(settings) req = Request(self.invalid_url) self._test_retry( req, DNSLookupError('foo'), max_retry_times, spider=spider, middleware=middleware, )
def test_with_metakey_zero(self): max_retry_times = 0 spider, middleware = self.get_spider_and_middleware() meta = {'max_retry_times': max_retry_times} req = Request(self.invalid_url, meta=meta) self._test_retry( req, DNSLookupError('foo'), max_retry_times, spider=spider, middleware=middleware, )
async def _getHostByName(self, name, timeout): s = time.time() r = None try: if timeout and timeout > 0: with async_timeout.timeout(timeout): r = await self.reslover.gethostbyname(name, socket.AF_INET) else: r = await self.reslover.gethostbyname(name, socket.AF_INET) except aiodns.error.DNSError as e: logger.error(f"resolve {name} {e} {time.time()-s:.5f}") raise DNSLookupError() except asyncio.TimeoutError as e: logger.error( f"resolve {name} timeout({timeout}) {time.time()-s:.5f}") raise DNSLookupError() result = r.addresses[0] logger.debug(f"resolve {name} {result} {time.time()-s:.5f}") dnscache[name] = result return result
def test_with_dont_retry(self): max_retry_times = 4 spider, middleware = self.get_spider_and_middleware() meta = { 'max_retry_times': max_retry_times, 'dont_retry': True, } req = Request(self.invalid_url, meta=meta) self._test_retry( req, DNSLookupError('foo'), 0, spider=spider, middleware=middleware, )
def test_failure(self): """ L{SimpleResolverComplexifier} translates a known error result from L{IResolverSimple.resolveHostName} into an empty result. """ simple = SillyResolverSimple() complex = SimpleResolverComplexifier(simple) receiver = ResultHolder(self) self.assertEqual(receiver._started, False) complex.resolveHostName(receiver, u"example.com") self.assertEqual(receiver._started, True) self.assertEqual(receiver._ended, False) self.assertEqual(receiver._addresses, []) simple._requests[0].errback(DNSLookupError("nope")) self.assertEqual(receiver._ended, True) self.assertEqual(receiver._addresses, [])
def resolveHostName( _self, resolutionReceiver, hostName, portNumber=0, addressTypes=None, transportSemantics="TCP", ): resolution = HostResolution(hostName) resolutionReceiver.resolutionBegan(resolution) if hostName not in self.lookups: raise DNSLookupError("OH NO") for i in self.lookups[hostName]: resolutionReceiver.addressResolved(i[0]("TCP", i[1], portNumber)) resolutionReceiver.resolutionComplete() return resolutionReceiver
def resolveHostName( _self, resolutionReceiver: IResolutionReceiver, hostName: str, portNumber: int = 0, addressTypes: Optional[Sequence[Type[IAddress]]] = None, transportSemantics: str = "TCP", ) -> IResolutionReceiver: resolution = HostResolution(hostName) resolutionReceiver.resolutionBegan(resolution) if hostName not in self.lookups: raise DNSLookupError("OH NO") for i in self.lookups[hostName]: resolutionReceiver.addressResolved(i[0]("TCP", i[1], portNumber)) resolutionReceiver.resolutionComplete() return resolutionReceiver
def getHostByName(self, host): try: return self.hosts[host] except KeyError: raise DNSLookupError()
def ebResolved(err): err.trap(error.DNSNameError) raise DNSLookupError()
Executes process_exception() method of the middleware. """ settings, exception = request.param crawler = get_crawler(Spider, settings_dict=settings) spider = crawler._create_spider('foo') mw = RetryUserAgentMiddleware.from_crawler(crawler) req = Request('http://www.scrapytest.org/') yield mw.process_exception(req, exception, spider) @pytest.mark.parametrize('retry_middleware_response', (({ 'FAKEUSERAGENT_FALLBACK': 'firefox' }, 503), ), indirect=True) def test_random_ua_set_on_response(retry_middleware_response): assert 'User-Agent' in retry_middleware_response.headers @pytest.mark.parametrize('retry_middleware_exception', (({ 'FAKEUSERAGENT_FALLBACK': 'firefox' }, DNSLookupError('Test exception')), ), indirect=True) def test_random_ua_set_on_exception(retry_middleware_exception): assert 'User-Agent' in retry_middleware_exception.headers
def getHostByName(self, name, timeout=None): if name not in lookups: return fail(DNSLookupError("OH NO: unknown %s" % (name, ))) return succeed(lookups[name])
def getHostByName(self, name, timeout): try: return succeed(self.names[name]) except KeyError: return fail(DNSLookupError("FakeResolver couldn't find " + name))
def getHostByName(*args, **kwargs) -> Deferred: return fail(DNSLookupError())