class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ headers = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', } try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy options = {} options["url"] = TEST_URL options["headers"] = headers options["proxy"] = real_proxy options["timeout"] = 5 options["allow_redirects"] = False options["verify_ssl"] = False response = await requests.get(**options) print(response.status) if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: print('请求响应码不合法 ', response.status, 'IP', proxy) self.redis.delete(proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.delete(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None self._usable_proxies = [] def set_raw_proxies(self, proxies): self._raw_proxies = proxies self._conn = RedisClient() def set_timing_params(self): self._conn = RedisClient() self._all_ips_item = self._conn.getAll() #把现在所有的ip列表都拿出来做检查 self._post_url = ALIE_API async def test_single_proxy(self, proxy): """ text one proxy, if valid, put them to usable_proxies. """ try: async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing', proxy) async with session.get( self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response: if response.status == 200: self._conn.put(proxy) print('Valid proxy', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Invalid proxy', proxy) except (ServerDisconnectedError, ClientResponseError, ClientConnectorError) as s: print(s) pass def test(self): """ aio test all proxies. """ print('ValidityTester is working') try: loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in self._raw_proxies ] #test_single_proxy 检验ip是否有效 loop.run_until_complete(asyncio.wait(tasks)) #loop.run_until_complete(asyncio.gather(self.test_single_proxy(proxy) for proxy in self._raw_proxies)) except ValueError: print('Async Error') async def TimingCheckFunction(self, proxy): try: async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): #bytes=str proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy headers = {'User-Agent': choice(AGENTS)} print('Timing Check Async Ip:' + str(proxy)) async with session.get(self._post_url, proxy=real_proxy, timeout=get_proxy_timeout, headers=headers) as response: if (response.status != 200): self._conn.delete(proxy) print('Delete Old Invalid Proxy', proxy) else: print('Keep Save IP', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Foreach Delete Invalid Proxy Error', proxy) self._conn.delete(proxy) except (ServerDisconnectedError, ClientResponseError, ClientConnectorError) as s: print('-------') print(s) #self._conn.delete(proxy) pass def TimingCheck(self): try: loop = asyncio.get_event_loop() tasks = [ self.TimingCheckFunction(proxy) for proxy in self._all_ips_item ] #test_single_proxy 检验ip是否有效 loop.run_until_complete(asyncio.wait(tasks)) except ValueError: print('Timing Check Error')
class Tester(object): def __init__(self, data): self.redis = RedisClient(data) self.data = data async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(self.data['TEST_URL'], proxy=real_proxy, timeout=15, allow_redirects=False) as response: text = await response.read() if self.data['TEST_tage'] == 'in': if bytes(self.data['TEST_if'], encoding="utf8") in text: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print( '不满足条件{}'.format(self.data['TEST_if'] + self.data['TEST_tage']), response.status, 'IP', proxy) if self.data['TEST_tage'] == 'not in': if bytes(self.data['TEST_if'], encoding="utf8") not in text: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print( '不满足条件{}'.format(self.data['TEST_if'] + self.data['TEST_tage']), response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.delete(proxy) print('代理请求失败', proxy) async def _single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, self.data['BATCH_TEST_SIZE']): start = i stop = min(i + self.data['BATCH_TEST_SIZE'], count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args) # Tester().run()