示例#1
0
    def build_check_request(self, item: Proxy):
        scheme = item.get('scheme')
        proxy_url = item.get('url')
        self.logger.debug('Checking %s' % proxy_url)

        url, response_parser = self.get_check_approach(scheme)
        url = url.format(scheme=scheme)

        meta = {
            'proxy': proxy_url,
            'max_retry_times': 5,
            'download_timeout': 20,
            '_item_obj': item,
            '_response_parser': response_parser,
        }

        req = Request(url,
                      callback=self.check_ip,
                      meta=meta,
                      dont_filter=True)

        if self.name == 'checker':
            req.errback = self.check_ip_failed

        return req
示例#2
0
    def build_check_recipient(self, ip, port, scheme,
                              user=None, password=None):
        """
        1. build a request for availability checking
        2. drop it if already existed

        :return: Request
        """

        if self.complete_condition():
            raise exceptions.CloseSpider('Enough items')

        spec = dict(ip=ip, port=port, scheme=scheme)

        if self.already_exists(spec):
            self.logger.debug('Dropped duplicated: %s' % spec.values())

            return {}  # drop it

        proxy_url = utils.build_proxy_url(ip, port, scheme, user, password)
        need_auth = bool(user and password)
        item = Proxy(
            ip=ip,
            scheme=scheme,
            port=port,
            need_auth=need_auth,
            url=proxy_url,
        )

        if need_auth:
            item['user'], item['password'] = user, password

        return self.build_check_request(item)
示例#3
0
    def start_requests(self):
        keys = self.srv.get_all_keys()

        for key in keys:
            data = self.srv.hgetall_dict(key)
            last_check = data.get('last_check', 0)

            if not valid_format(data):
                self.srv.delete(key, 'Error format %s' % data)

                continue

            if exceed_check_period(last_check):
                item = Proxy(**data)
                yield self.build_check_request(item)
示例#4
0
 def _validate_type(self):
     if not isinstance(self._item, Proxy):
         self._item = Proxy(self._item)