class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: real_proxy = 'http://' + proxy async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as res: if res.status in STATUS_CODE: self.redis.set_max_score(proxy) else: self.redis.decrease(proxy) except: pass def run(self): try: count = self.redis.get_count() for i in range(0, count, BATCH_TEST_COUNT): start = i stop = min(count, i + BATCH_TEST_COUNT) proxies = self.redis.get_batch(start, stop - 1) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Filter: def __init__(self): self.db = RedisClient() async def check_one(self, proxy): proxies = {'http': 'http://' + proxy} try: print('正在测试: {}'.format(proxy)) r = requests.get(TEST_URL, proxies=proxies) except requests.RequestException: print('检测失败', proxy) self.db.remove(proxy) return if r.status_code == 200: print('代理可用', proxy) self.db.decrease(proxy) def run(self): print('===开始测试代理===') try: print('当前代理个数:{}'.format(self.db.count)) tasks = [ asyncio.ensure_future(self.check_one(proxy.decode())) for proxy in self.db.batch() ] loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) except Exception as e: print('测试错误', e.args)
class Tester(object): def __init__(self, redis_key): self.redis = RedisClient(redis_key) async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en;q=0.9,ja;q=0.8,fr;q=0.7', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0', # 'Upgrade-Insecure-Requests': 1, 'Connection': 'close', } async with session.get(TEST_URL, headers=headers, proxy=real_proxy, timeout=TIMEOUT, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester: def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): # 判断是不是bytes类型 proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试') async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: # 状态码是否为200,302 self.redis.max(proxy) # 代理可用就改变代理的分数为100 print('代理可用', proxy) else: self.redis.decrease(proxy) # 代理减分 print('请求响应码不合理', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败______________', proxy) def run(self): """ 检测主函数 :return: """ print('检测器开始运行') try: count = self.redis.count() # 获取proxies数量 print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): # 最大批测试量BATCH_TEST_SIZE = 10 start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) # 批量获取 loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy logger.debug(f'正在测试 {proxy}') async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) logger.debug(f'代理可用 {proxy}') else: self.redis.decrease(proxy) logger.debug(f'请求响应码不合法 {response.status}, IP {proxy}') except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) logger.debug(f'代理请求失败 {proxy}') def run(self): """ 测试主函数 :return: """ logger.debug('测试器开始运行') try: count = self.redis.count() logger.debug(f'当前剩余 {count} 个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) logger.debug(f'正在测试第 {start + 1 - stop} 个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: logger.debug(f'测试器发生错误 {e.args}')
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ headers = { "Connection": "keep-alive", "Host": "www.sogou.com", "Pragma": "no-cache", "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36', } conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试' + proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False, headers=headers) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用' + proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ' + str(response.status) + 'IP' + proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败{}'.format(proxy)) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余{}个代理'.format(count)) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第{}-{}个代理'.format(start + 1, stop)) test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误{}'.format(e.args))
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在验证 --> ', real_proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('验证成功 -->', proxy) else: self.redis.decrease(proxy) print('验证失败 Code 不合法-->', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectionError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('验证失败 -->', proxy) def run(self): """ 测试主函数 :return: """ print("验证程序启动") try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy=proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(3) except Exception as e: print('测试中发生了错误 --> ', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, session, proxy): """测试单个代理""" try: real_proxy = eval(proxy)['https'] print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=20, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: rst = await response.text() if rst: resp_ip = '//'+eval(rst).get('headers').get('X-Forwarded-For') proxy_ip = real_proxy.split(':') if resp_ip == proxy_ip[1]: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) async def set_test_tasks(self, loop): """设置测试任务""" count = self.redis.count print('当前剩余', count, '个代理') for start in range(0, count, BATCH_TEST_SIZE): # 一段一段创建任务, 每一段一个Session减少内存开销 stop = min(start + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) # conn = aiohttp.TCPConnector(verify_ssl=False) conn = aiohttp.TCPConnector() async with aiohttp.ClientSession(connector=conn, loop=loop) as session: tasks = [self.test_single_proxy(session, proxy) for proxy in test_proxies] await asyncio.wait(tasks) def run(self): """测试主函数""" print('测试器开始运行') try: loop = asyncio.get_event_loop() loop.run_until_complete(self.set_test_tasks(loop)) sys.stdout.flush() # 马上print不用等到循环结束 time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"} conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy,headers=headers, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): ''' 测试单个代理k可用q情况,参数j就是被j检测的代理 :param proxy: 单个代理 :return: None ''' conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes) proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试 ', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用 ', proxy) else: self.redis.decrease(proxy) print('请求相应码不合法', proxy) except (ServerDisconnectedError, ClientResponseError,ClientConnectorError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): ''' c测试z主函数 :return: None ''' print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() #批量测试 for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('c测试器f发生c错误', e.args)
class Tester(object): def __init__(self): self.client = RedisClient() async def test(self, proxy): """ 测试单个proxy :param proxy: :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') proxy_data = 'http://' + proxy async with session.get(TEST_URL, proxy=proxy_data, timeout=15) as response: logger.info('测试:%s,结果:%s', proxy_data, response.status) if response.status in VALID_STATUS: logger.info('代理测试可用') self.client.set_vaild(proxy) else: logger.info('代理测试不可用,分值减一') self.client.decrease(proxy) except Exception as e: logger.info('%s 测试失败,分值减一', proxy) self.client.decrease(proxy) def run(self): """ 批量测试代理 :return: """ logger.info('开始测试...') try: proxies = self.client.all() loop = asyncio.get_event_loop() for i in range(0, len(proxies), TEST_BATCH_SIZE): test_proxy = proxies[i:i + TEST_BATCH_SIZE] tester_list = [self.test(proxy) for proxy in test_proxy] loop.run_until_complete(asyncio.wait(tester_list)) sleep(5) except Exception as e: logger.info('测试发生错误')
class Tester: def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode("utf-8") format_proxy = "http://" + proxy async with session.get(TEST_API, proxy=format_proxy, timeout=PROXY_TIMEOUT, allow_redirects=False) as response: if response.status == 200: self.redis.max(proxy) else: self.redis.decrease(proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) def run(self): print("测试器开始运行") try: count = self.redis.count() print("当前剩余%s个代理" % count) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print("正在测试第%s个--第%s个代理" % (start + 1, stop)) test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print("代理测试器发生错误%s" % e.args)
class Tester(object): def __init__(self): """ 初始化 Redis """ self.redis = RedisClient() self.loop = asyncio.get_event_loop() async def test(self, proxy: Proxy): """ 测试单个代理: """ async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: try: logger.debug(f'测试 {proxy.string()}') async with session.get(TEST_URL, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT, allow_redirects=False) as response: if response.status in TEST_VALID_STATUS: self.redis.max(proxy) logger.debug(f'代理 {proxy.string()} 可用, 加分') else: self.redis.decrease(proxy) logger.debug(f'代理 {proxy.string()} 无效, 减分') except EXCEPTIONS: self.redis.decrease(proxy) logger.debug(f'代理 {proxy.string()} 无效, 减分') def run(self): """ 测试主函数 """ logger.info('启动测试器......') count = self.redis.count() logger.debug(f'{count} 个代理等待测试') for i in range(0, count, TEST_BATCH): # 开始测试的代理,停止测试的代理 start, end = i, min(i + TEST_BATCH, count) logger.debug(f'测试索引值从 {start} 到 {end} 的代理') proxies = self.redis.batch(start, end) tasks = [self.test(proxy) for proxy in proxies] # 使用事件循环运行任务 self.loop.run_until_complete(asyncio.wait(tasks))
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing:', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码非法', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() #批量测试 for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self, data): self.redis = RedisClient(data) self.data = data async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(self.data['TEST_URL'], proxy=real_proxy, timeout=15, allow_redirects=False) as response: text = await response.read() if self.data['TEST_tage'] == 'in': if bytes(self.data['TEST_if'], encoding="utf8") in text: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print( '不满足条件{}'.format(self.data['TEST_if'] + self.data['TEST_tage']), response.status, 'IP', proxy) if self.data['TEST_tage'] == 'not in': if bytes(self.data['TEST_if'], encoding="utf8") not in text: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print( '不满足条件{}'.format(self.data['TEST_if'] + self.data['TEST_tage']), response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.delete(proxy) print('代理请求失败', proxy) async def _single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, self.data['BATCH_TEST_SIZE']): start = i stop = min(i + self.data['BATCH_TEST_SIZE'], count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args) # Tester().run()
class Tester(object): def __init__(self): self.redis = RedisClient() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy # real_proxy = 'https://' + proxy print('正在测试', proxy) async with session.get(url=TEST_URL, proxy=real_proxy, headers=self.headers, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') count = self.redis.count() print('当前剩余', count, '个代理') # 每次运行本测试单元,应该先将库存里满分的代理取出来测试,剔除无效代理,保证开启线程池后提供的代理即是可用的; useful_ip = self.redis.all_useful() if useful_ip: count_usefully = len(useful_ip) print('第一个有用的代理: {}'.format(useful_ip[0]), '共{}个'.format(count_usefully)) for i in range(0, count_usefully, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count_usefully) print('正在测试第', start + 1, '-', stop, '个代理(usefully)') self.batch_proxies(useful_ip[start: stop + 1]) else: print('当前无可用代理,请等待...') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理(normally)') test_proxies = self.redis.batch(start, stop) self.batch_proxies(test_proxies) def batch_proxies(self, test_proxies): try: loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): #创建对象,供该对象中其他方法使用 self.redis = RedisClient() #异步方法,aiohttp写法 async def test_single_proxy(self, proxy): ''' 测试单个代理 :param proxy: :return: ''' conn = aiohttp.TCPConnector(verify_ssl=False) #创建ClientSession对象,类似于requests的session对象 #可直接调用该对象的get方法访问网页 async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) #通过proxy传递参数给get() #TEST_URL测试url async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: #VALID_STATUS_CODES,状态码列表 if response.status in VALID_STATUS_CODES: #max()将代理分数设为100 self.redis.max(proxy) print('代理可用', proxy) else: #decrease()代理分数-1 self.redis.decrease(proxy) print('请求响应码不合法', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('请求代理失败', proxy) def run(self): ''' 测试主函数 :return: ''' print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') #BATCH_TEST_SIZE最大测试数 for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') #获取测试代理 test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() self.spider_log = logging.getLogger(TESTLOGGER) async def test_single_proxy(self, proxy, mode=None): """ 测试单个代理 :param proxy: :return: """ if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') if mode is None: rediskey = REDIS_KEY url = TEST_URL proxy_prefix = 'http' elif mode == REDIS_HTTP: rediskey = REDIS_HTTP url = TEST_URL proxy_prefix = 'http' elif mode == REDIS_HTTPS: rediskey = REDIS_HTTPS url = HTTPSTEST_URL proxy_prefix = 'https' test_proxy = 'http://' + proxy conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: # self.spider_log.info('正在测试' + test_proxy) async with session.get(url, proxy=test_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy, mode) # self.spider_log.info(proxy_prefix + '代理可用' + proxy) else: self.redis.decrease(proxy, mode) self.spider_log.warn('请求响应码不合法 ' + str(response.status) + 'IP' + proxy_prefix + ":" + proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy, mode) # self.spider_log.warn(proxy_prefix + '代理请求失败' + proxy) def run(self, mode=None): """ 测试主函数 :return: """ self.spider_log.info('测试器定时开始') if mode is None: rediskey = REDIS_KEY else: rediskey = mode try: count = self.redis.count(mode) self.spider_log.info('测试器开始运行' + rediskey + '当前剩余' + str(count) + '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) self.spider_log.info('正在测试第' + str(start + 1) + '-' + str(stop) + '个' + rediskey + '代理') test_proxies = self.redis.batch(start, stop, mode=mode) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy, mode=mode) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: self.spider_log.error('测试器发生错误' + str(e.args)) self.spider_log.error('traceback:' + traceback.format_exc())
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ # clientSession 客户端,TCPConnector 忽略证书验证 conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: # encoding if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy logger.info('正在测试%s' % proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=5, allow_redirects=False) as response: # VALID_STATUS_CODES = [200, 302] if response.status in VALID_STATUS_CODES: self.redis.max(proxy) logger.info('代理可用%s' % proxy) else: self.redis.decrease(proxy) logger.info('请求响应码不合法%s IP%s' % (response.status, proxy)) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) logger.info('代理请求失败%s' % proxy) def run(self): """ 测试主函数 :return: """ logger.info('测试器开始运行') try: count = self.redis.count() logger.info('当前剩余%d个代理' % count) # BATCH_TEST_SIZE = 10 # In[1]: [i for i in range(0, 100, 10)] Out[2]: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90] for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) logger.info('正在测试第%d-%d个代理' % (start + 1, stop)) test_proxies = self.redis.batch(start, stop) # asyncio/wait 协程 loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) # 必须注释,否则报RuntimeError: Event loop is closed # loop.close() # 刷新输出 sys.stdout.flush() time.sleep(5) except Exception as e: logger.exception('测试器发生错误%s' % e.args)