Пример #1
0
 def _get_slot(self, request):
     downloader = self.crawler.engine.downloader
     key = urlparse_cached(request).hostname or ''
     if downloader.ip_concurrency:
         key = dnscache.get(key, key)
     return key, downloader.slots.get(key) or downloader.inactive_slots.get(
         key)
Пример #2
0
    def _get_slot_key(self, request, spider):
        if "download_slot" in request.meta:
            return request.meta["download_slot"]

        key = urlparse_cached(request).hostname or ""
        if self.ip_concurrency:
            key = dnscache.get(key, key)

        return key
Пример #3
0
    def _get_slot_key(self, request, spider):
        if 'download_slot' in request.meta:
            return request.meta['download_slot']

        key = urlparse_cached(request).hostname or ''
        if self.ip_concurrency:
            key = dnscache.get(key, key)

        return key
Пример #4
0
    def _get_slot_key(self, request, spider):
        if self.DOWNLOAD_SLOT in request.meta:
            return request.meta[self.DOWNLOAD_SLOT]

        key = urlparse_cached(request).hostname or ''
        if self.ip_concurrency:
            key = dnscache.get(key, key)

        return key
Пример #5
0
    def _get_slot_key(self, request, spider):
        if self.DOWNLOAD_SLOT in request.meta:
            return request.meta[self.DOWNLOAD_SLOT]

        key = urlparse_cached(request).hostname or ''
        if self.ip_concurrency:
            key = dnscache.get(key, key)

        return key
Пример #6
0
    def _get_slot_key(self, request, spider):
        if self.DOWNLOAD_SLOT in request.meta: #如果request.meta里有 这个slot信息 就调用那个
            return request.meta[self.DOWNLOAD_SLOT]
        #否则 key是hostname 或者""
        key = urlparse_cached(request).hostname or ''
        if self.ip_concurrency:
            key = dnscache.get(key, key) #从 DNS缓存总拿到对应host的key

        return key
Пример #7
0
 def _get_slot(self, request, spider):
     key = urlparse_cached(request).hostname or ''
     if self.ip_concurrency:
         key = dnscache.get(key, key)
     if key not in self.slots:
         if self.ip_concurrency:
             concurrency = self.ip_concurrency
         else:
             concurrency = self.domain_concurrency
         concurrency, delay = _get_concurrency_delay(concurrency, spider, self.settings)
         self.slots[key] = Slot(concurrency, delay, self.settings)
     return key, self.slots[key]
Пример #8
0
 def _get_slot(self, request, spider):
     key = urlparse_cached(request).hostname or ''
     if self.ip_concurrency:
         key = dnscache.get(key, key)
     if key not in self.slots:
         if self.ip_concurrency:
             concurrency = self.ip_concurrency
         else:
             concurrency = self.domain_concurrency
         concurrency, delay = _get_concurrency_delay(
             concurrency, spider, self.settings)
         self.slots[key] = Slot(concurrency, delay, self.settings)
     return key, self.slots[key]
Пример #9
0
    def resolveHostName(
        self,
        resolutionReceiver,
        hostName,
        portNumber=0,
        addressTypes=None,
        transportSemantics="TCP",
    ):

        cached_addresses = dnscache.get(hostName)
        if cached_addresses:
            resolutionReceiver.resolutionBegan(HostResolution(hostName))
            for address in cached_addresses:
                resolutionReceiver.addressResolved(address)
            resolutionReceiver.resolutionComplete()
            return resolutionReceiver

        @provider(IResolutionReceiver)
        class CachingResolutionReceiver:

            def __init__(self):
                self.addresses = []

            def resolutionBegan(self, resolution):
                resolutionReceiver.resolutionBegan(resolution)

            def addressResolved(self, address):
                resolutionReceiver.addressResolved(address)
                self.addresses.append(address)

            def resolutionComplete(self):
                resolutionReceiver.resolutionComplete()
                if self.addresses:
                    dnscache[hostName] = tuple(self.addresses)

        return self.original_resolver.resolveHostName(
            CachingResolutionReceiver(),
            hostName,
            portNumber,
            addressTypes,
            transportSemantics,
        )
Пример #10
0
 def _get_slot(self, request):
     downloader = self.crawler.engine.downloader
     key = urlparse_cached(request).hostname or ''
     if downloader.ip_concurrency:
         key = dnscache.get(key, key)
     return key, downloader.slots.get(key) or downloader.inactive_slots.get(key)
Пример #11
0
 def _get_key(self, request, type):
     key = urlparse_cached(request).hostname or ''
     if type == 'ip':
         key = dnscache.get(key, key)
     return key
Пример #12
0
 def _get_key(self, request, type):
     key = urlparse_cached(request).hostname or ''
     if type == 'ip':
         key = dnscache.get(key, key)
     return key