Пример #1
0
    def download(self, url):
        try:
            response = requests.get(url, headers=self.headers, timeout=TIMEOUT)
            response.encoding = chardet.detect(response.content)['encoding']
            if response.ok:
                return response.text
            else:
                raise ConnectionError
        except ConnectionError:
            for retry_count in range(RETRY_TIME):
                proxies_list = list()

                # 代理使用本机IP
                proxies_list.append({})
                # 使用数据库中代理IP去访问代理网站
                condition_dict = {"is_valid": True, "protocol": HTTP_PROTOCOL}
                proxy_list = Client.select(1, condition_dict)
                if proxy_list:
                    proxy = Client.select(1, condition_dict)[0]
                    proxies = {"http": "http://%s:%s" % (proxy.ip, proxy.port)}
                    proxies_list.append(proxies)

                # 从数据库代理IP和本机IP随机选择一个
                proxies = random.choice(proxies_list)
                response = requests.get(url,
                                        headers=self.headers,
                                        proxies=proxies,
                                        timeout=TIMEOUT)
                response.encoding = chardet.detect(
                    response.content)['encoding']
                if response.ok:
                    return response.text
            else:
                return unicode()
Пример #2
0
 def get(self):
     condition_dict = {"is_valid": True, "protocol": HTTPS_PROTOCOL}
     proxy_list = Client.select(0, condition_dict)
     for proxy in proxy_list:
         if check_proxy(proxy):
             result_dict = {
                 "message": "Get a https proxy successfully",
                 "ip": proxy.ip,
                 "port": proxy.port,
                 "protocol": "https"
             }
             api_logger.info(
                 "ProxyCheck: %s://%s:%d validation pass" %
                 ("http" if proxy.protocol == HTTP_PROTOCOL else "https",
                  proxy.ip, proxy.port))
             api_logger.info(
                 "Return %s://%s:%d" %
                 ("http" if proxy.protocol == HTTP_PROTOCOL else "https",
                  proxy.ip, proxy.port))
             return result_dict
         else:
             api_logger.info(
                 "ProxyCheck: %s://%s:%d validation fail" %
                 ("http" if proxy.protocol == HTTP_PROTOCOL else "https",
                  proxy.ip, proxy.port))
     else:
         result_dict = {
             "message": "Get a https proxy unsuccessfully",
         }
         api_logger.info("Return None")
         return result_dict
Пример #3
0
 def put_queue(self):
     condition_dict = {"is_valid": False}
     for raw_proxy in Client.select(count=0, condition_dict=condition_dict):
         self.raw_proxy_queue.put(raw_proxy)
Пример #4
0
 def put_queue(self):
     condition_dict = {"is_valid": True}
     for valid_proxy in Client.select(count=0,
                                      condition_dict=condition_dict):
         self.valid_proxy_queue.put(valid_proxy)