def download(self, url): try: response = requests.get(url, headers=self.headers, timeout=TIMEOUT) response.encoding = chardet.detect(response.content)['encoding'] if response.ok: return response.text else: raise ConnectionError except ConnectionError: for retry_count in range(RETRY_TIME): proxies_list = list() # 代理使用本机IP proxies_list.append({}) # 使用数据库中代理IP去访问代理网站 condition_dict = {"is_valid": True, "protocol": HTTP_PROTOCOL} proxy_list = Client.select(1, condition_dict) if proxy_list: proxy = Client.select(1, condition_dict)[0] proxies = {"http": "http://%s:%s" % (proxy.ip, proxy.port)} proxies_list.append(proxies) # 从数据库代理IP和本机IP随机选择一个 proxies = random.choice(proxies_list) response = requests.get(url, headers=self.headers, proxies=proxies, timeout=TIMEOUT) response.encoding = chardet.detect( response.content)['encoding'] if response.ok: return response.text else: return unicode()
def get(self): condition_dict = {"is_valid": True, "protocol": HTTPS_PROTOCOL} proxy_list = Client.select(0, condition_dict) for proxy in proxy_list: if check_proxy(proxy): result_dict = { "message": "Get a https proxy successfully", "ip": proxy.ip, "port": proxy.port, "protocol": "https" } api_logger.info( "ProxyCheck: %s://%s:%d validation pass" % ("http" if proxy.protocol == HTTP_PROTOCOL else "https", proxy.ip, proxy.port)) api_logger.info( "Return %s://%s:%d" % ("http" if proxy.protocol == HTTP_PROTOCOL else "https", proxy.ip, proxy.port)) return result_dict else: api_logger.info( "ProxyCheck: %s://%s:%d validation fail" % ("http" if proxy.protocol == HTTP_PROTOCOL else "https", proxy.ip, proxy.port)) else: result_dict = { "message": "Get a https proxy unsuccessfully", } api_logger.info("Return None") return result_dict
def put_queue(self): condition_dict = {"is_valid": False} for raw_proxy in Client.select(count=0, condition_dict=condition_dict): self.raw_proxy_queue.put(raw_proxy)
def put_queue(self): condition_dict = {"is_valid": True} for valid_proxy in Client.select(count=0, condition_dict=condition_dict): self.valid_proxy_queue.put(valid_proxy)