class ValidityTester(object): """ 检测代理是否正常 """ def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理IP :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') # real_proxy = 'http://' + proxy real_proxy_data = { 'http:': 'http://{}'.format(proxy), 'https:': 'https://{}'.format(proxy), } if TEST_URL.startswith('http:'): real_proxy = real_proxy_data.get('http:') else: real_proxy = real_proxy_data.get('https:') # aiohttp不支持检测https的代理 # print("正在测试ip:{}".format(real_proxy)) async with session.get(url=TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) # 检测正常,设置分数 # results = await response.text() # print('代理检测正常:', json.loads(results).get('origin')) else: self.redis.decrase(proxy) # 检测不正常,减分 # print("响应状态码不合法:{} - ip:{}".format(response.status, proxy)) except(ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrase(proxy) # 抛异常减分 # print("请求不到测试地址,代理不能用:{}".format(proxy)) def run(self): """ 检测主函数 :return: """ # print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) # 代理的列表 loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy=proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() # 调用sys.stdout.flush()强制它“刷新”缓冲区,这意味着它会将缓冲区中的所有内容写入终端,即使通常它会在执行此操作之前等待 time.sleep(5) except Exception as e: print("测试器错误:{}".format(e.args))