Пример #1
0
    def _retry(self, request, reason, spider):
        proxy = request.meta.get('proxy')
        if proxy:
            priority = get_priority(proxy)
            if priority:
                add_or_update(proxy, priority + PRIORITY_RATE)
            proxy = get_proxy()
            if proxy:
                request.meta['proxy'] = proxy
                spider.log('RetryMiddleware used proxy: %s, new proxy: %s' %
                           (request.meta['proxy'], proxy), logging.DEBUG)

        super(ProxyRetryMiddleware, self)._retry(request, reason, spider)
Пример #2
0
    def _response_downloaded(self, response, request, spider):
        key, slot = self._get_slot(request, spider)
        latency = request.meta.get('download_latency') * 1000
        proxy = request.meta.get('proxy')

        if latency is None or slot is None or proxy is None:
            return

        if self.debug:
            size = len(response.body)
            conc = len(slot.transferring)
            msg = "slot: %s | conc: %2d | latency:%5d ms | size: %6d bytes | proxy:%s" \
                  % (key, conc, latency, size, proxy)
            spider.log(msg, level=logging.DEBUG)

        add_or_update(proxy, latency)
Пример #3
0
 def process_item(self, item, spider):
     add_or_update(item["proxy"], item["priority"])
     return item