Пример #1
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                real_proxy = 'http://' + proxy
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as res:
                    if res.status in STATUS_CODE:
                        self.redis.set_max_score(proxy)
                    else:
                        self.redis.decrease(proxy)
            except:
                pass

    def run(self):
        try:
            count = self.redis.get_count()
            for i in range(0, count, BATCH_TEST_COUNT):
                start = i
                stop = min(count, i + BATCH_TEST_COUNT)
                proxies = self.redis.get_batch(start, stop - 1)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #2
0
class Filter:
    def __init__(self):
        self.db = RedisClient()

    async def check_one(self, proxy):

        proxies = {'http': 'http://' + proxy}
        try:
            print('正在测试: {}'.format(proxy))
            r = requests.get(TEST_URL, proxies=proxies)
        except requests.RequestException:
            print('检测失败', proxy)
            self.db.remove(proxy)
            return
        if r.status_code == 200:
            print('代理可用', proxy)
            self.db.decrease(proxy)

    def run(self):
        print('===开始测试代理===')
        try:
            print('当前代理个数:{}'.format(self.db.count))
            tasks = [
                asyncio.ensure_future(self.check_one(proxy.decode()))
                for proxy in self.db.batch()
            ]
            loop = asyncio.get_event_loop()
            loop.run_until_complete(asyncio.wait(tasks))

        except Exception as e:
            print('测试错误', e.args)
Пример #3
0
class Tester(object):
    def __init__(self, redis_key):
        self.redis = RedisClient(redis_key)

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                headers = {
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'en;q=0.9,ja;q=0.8,fr;q=0.7',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
                    # 'Upgrade-Insecure-Requests': 1,
                    'Connection': 'close',
                }

                async with session.get(TEST_URL, headers=headers, proxy=real_proxy, timeout=TIMEOUT, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError, ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #4
0
class Tester:
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):  # 判断是不是bytes类型
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试')
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:  # 状态码是否为200,302
                        self.redis.max(proxy)  # 代理可用就改变代理的分数为100
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)  # 代理减分
                        print('请求响应码不合理', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败______________', proxy)

    def run(self):
        """
        检测主函数
        :return:
        """
        print('检测器开始运行')
        try:
            count = self.redis.count()  # 获取proxies数量
            print('当前剩余', count, '个代理')
            for i in range(0, count,
                           BATCH_TEST_SIZE):  # 最大批测试量BATCH_TEST_SIZE = 10
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)  # 批量获取
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #5
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                logger.debug(f'正在测试 {proxy}')
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        logger.debug(f'代理可用 {proxy}')
                    else:
                        self.redis.decrease(proxy)
                        logger.debug(f'请求响应码不合法 {response.status}, IP {proxy}')
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                logger.debug(f'代理请求失败 {proxy}')

    def run(self):
        """
        测试主函数
        :return:
        """
        logger.debug('测试器开始运行')
        try:
            count = self.redis.count()
            logger.debug(f'当前剩余 {count} 个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                logger.debug(f'正在测试第 {start + 1 - stop} 个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            logger.debug(f'测试器发生错误 {e.args}')
Пример #6
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        headers = {
            "Connection": "keep-alive",
            "Host": "www.sogou.com",
            "Pragma": "no-cache",
            "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36',
        }
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试' + proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False,
                                       headers=headers) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用' + proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ' + str(response.status) + 'IP' + proxy)
            except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败{}'.format(proxy))

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余{}个代理'.format(count))
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第{}-{}个代理'.format(start + 1, stop))
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误{}'.format(e.args))
Пример #7
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """

        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')

                real_proxy = 'http://' + proxy
                print('正在验证 --> ', real_proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('验证成功 -->', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('验证失败 Code 不合法-->', proxy)

            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectionError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('验证失败 -->', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print("验证程序启动")
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy=proxy)
                    for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(3)
        except Exception as e:
            print('测试中发生了错误 --> ', e.args)
Пример #8
0
class Tester(object):

    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, session, proxy):
        """测试单个代理"""
        try:
            real_proxy = eval(proxy)['https']
            print('正在测试', proxy)
            async with session.get(TEST_URL, proxy=real_proxy, timeout=20, allow_redirects=False) as response:
                if response.status in VALID_STATUS_CODES:
                    rst = await response.text()
                    if rst:
                        resp_ip = '//'+eval(rst).get('headers').get('X-Forwarded-For')
                        proxy_ip = real_proxy.split(':')
                        if resp_ip == proxy_ip[1]:
                            self.redis.max(proxy)
                            print('代理可用', proxy)
                else:
                    self.redis.decrease(proxy)
                    print('请求响应码不合法 ', response.status, 'IP', proxy)
        except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
            self.redis.decrease(proxy)
            print('代理请求失败', proxy)

    async def set_test_tasks(self, loop):
        """设置测试任务"""
        count = self.redis.count
        print('当前剩余', count, '个代理')
        for start in range(0, count, BATCH_TEST_SIZE): # 一段一段创建任务, 每一段一个Session减少内存开销
            stop = min(start + BATCH_TEST_SIZE, count)
            print('正在测试第', start + 1, '-', stop, '个代理')
            test_proxies = self.redis.batch(start, stop)
            # conn = aiohttp.TCPConnector(verify_ssl=False)
            conn = aiohttp.TCPConnector()
            async with aiohttp.ClientSession(connector=conn, loop=loop) as session:
                tasks = [self.test_single_proxy(session, proxy) for proxy in test_proxies]
                await asyncio.wait(tasks)

    def run(self):
        """测试主函数"""
        print('测试器开始运行')
        try:
            loop = asyncio.get_event_loop()
            loop.run_until_complete(self.set_test_tasks(loop))
            sys.stdout.flush()  # 马上print不用等到循环结束
            time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #9
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()
    
    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy,headers=headers, timeout=15, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)
    
    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #10
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()
    
    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)
    
    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #11
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        '''
        测试单个代理k可用q情况,参数j就是被j检测的代理
        :param proxy: 单个代理
        :return: None
        '''
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes)
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试 ', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用 ', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求相应码不合法', proxy)
            except (ServerDisconnectedError, ClientResponseError,ClientConnectorError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        '''
        c测试z主函数
        :return: None
        '''
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            #批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('c测试器f发生c错误', e.args)
Пример #12
0
class Tester(object):
    def __init__(self):
        self.client = RedisClient()

    async def test(self, proxy):
        """
        测试单个proxy
        :param proxy:
        :return: None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                proxy_data = 'http://' + proxy
                async with session.get(TEST_URL, proxy=proxy_data,
                                       timeout=15) as response:
                    logger.info('测试:%s,结果:%s', proxy_data, response.status)
                    if response.status in VALID_STATUS:
                        logger.info('代理测试可用')
                        self.client.set_vaild(proxy)
                    else:
                        logger.info('代理测试不可用,分值减一')
                        self.client.decrease(proxy)
            except Exception as e:
                logger.info('%s 测试失败,分值减一', proxy)
                self.client.decrease(proxy)

    def run(self):
        """
        批量测试代理
        :return:
        """
        logger.info('开始测试...')
        try:
            proxies = self.client.all()
            loop = asyncio.get_event_loop()
            for i in range(0, len(proxies), TEST_BATCH_SIZE):
                test_proxy = proxies[i:i + TEST_BATCH_SIZE]
                tester_list = [self.test(proxy) for proxy in test_proxy]
                loop.run_until_complete(asyncio.wait(tester_list))
                sleep(5)
        except Exception as e:
            logger.info('测试发生错误')
Пример #13
0
class Tester:
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf-8")
                format_proxy = "http://" + proxy
                async with session.get(TEST_API,
                                       proxy=format_proxy,
                                       timeout=PROXY_TIMEOUT,
                                       allow_redirects=False) as response:
                    if response.status == 200:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)

    def run(self):
        print("测试器开始运行")
        try:
            count = self.redis.count()
            print("当前剩余%s个代理" % count)
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print("正在测试第%s个--第%s个代理" % (start + 1, stop))
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print("代理测试器发生错误%s" % e.args)
Пример #14
0
class Tester(object):
    def __init__(self):
        """
        初始化 Redis
        """
        self.redis = RedisClient()
        self.loop = asyncio.get_event_loop()
    
    async def test(self, proxy: Proxy):
        """
        测试单个代理:
        """
        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
            try:
                logger.debug(f'测试 {proxy.string()}')
                async with session.get(TEST_URL, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT,
                                       allow_redirects=False) as response:
                    if response.status in TEST_VALID_STATUS:
                        self.redis.max(proxy)
                        logger.debug(f'代理 {proxy.string()} 可用, 加分')
                    else:
                        self.redis.decrease(proxy)
                        logger.debug(f'代理 {proxy.string()} 无效, 减分')
            except EXCEPTIONS:
                self.redis.decrease(proxy)
                logger.debug(f'代理 {proxy.string()} 无效, 减分')
    
    def run(self):
        """
        测试主函数
        """
        logger.info('启动测试器......')
        count = self.redis.count()
        logger.debug(f'{count} 个代理等待测试')
        
        for i in range(0, count, TEST_BATCH):
            # 开始测试的代理,停止测试的代理
            start, end = i, min(i + TEST_BATCH, count)
            logger.debug(f'测试索引值从 {start} 到 {end} 的代理')
            proxies = self.redis.batch(start, end)
            tasks = [self.test(proxy) for proxy in proxies]
            # 使用事件循环运行任务
            self.loop.run_until_complete(asyncio.wait(tasks))
Пример #15
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('Testing:', proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码非法', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            #批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #16
0
class Tester(object):
    def __init__(self, data):
        self.redis = RedisClient(data)
        self.data = data

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(self.data['TEST_URL'],
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    text = await response.read()
                    if self.data['TEST_tage'] == 'in':
                        if bytes(self.data['TEST_if'],
                                 encoding="utf8") in text:
                            self.redis.max(proxy)
                            print('代理可用', proxy)
                        else:
                            self.redis.decrease(proxy)
                            print(
                                '不满足条件{}'.format(self.data['TEST_if'] +
                                                 self.data['TEST_tage']),
                                response.status, 'IP', proxy)
                    if self.data['TEST_tage'] == 'not in':
                        if bytes(self.data['TEST_if'],
                                 encoding="utf8") not in text:
                            self.redis.max(proxy)
                            print('代理可用', proxy)
                        else:
                            self.redis.decrease(proxy)
                            print(
                                '不满足条件{}'.format(self.data['TEST_if'] +
                                                 self.data['TEST_tage']),
                                response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.delete(proxy)
                print('代理请求失败', proxy)

    async def _single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, self.data['BATCH_TEST_SIZE']):
                start = i
                stop = min(i + self.data['BATCH_TEST_SIZE'], count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)


# Tester().run()
Пример #17
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
        }
    
    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                # real_proxy = 'https://' + proxy
                print('正在测试', proxy)
                async with session.get(url=TEST_URL, proxy=real_proxy, headers=self.headers, timeout=15, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError, aiohttp.ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)
    
    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        count = self.redis.count()
        print('当前剩余', count, '个代理')
        # 每次运行本测试单元,应该先将库存里满分的代理取出来测试,剔除无效代理,保证开启线程池后提供的代理即是可用的;
        useful_ip = self.redis.all_useful()
        if useful_ip:
            count_usefully = len(useful_ip)
            print('第一个有用的代理: {}'.format(useful_ip[0]), '共{}个'.format(count_usefully))
            for i in range(0, count_usefully, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count_usefully)
                print('正在测试第', start + 1, '-', stop, '个代理(usefully)')
                self.batch_proxies(useful_ip[start: stop + 1])
        else:
            print('当前无可用代理,请等待...')
        for i in range(0, count, BATCH_TEST_SIZE):
            start = i
            stop = min(i + BATCH_TEST_SIZE, count)
            print('正在测试第', start + 1, '-', stop, '个代理(normally)')
            test_proxies = self.redis.batch(start, stop)
            self.batch_proxies(test_proxies)

    def batch_proxies(self, test_proxies):
        try:
            loop = asyncio.get_event_loop()
            tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
            loop.run_until_complete(asyncio.wait(tasks))
            sys.stdout.flush()
            time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #18
0
class Tester(object):
    def __init__(self):
        #创建对象,供该对象中其他方法使用
        self.redis = RedisClient()

    #异步方法,aiohttp写法
    async def test_single_proxy(self, proxy):
        '''
        测试单个代理
        :param proxy:
        :return:
        '''
        conn = aiohttp.TCPConnector(verify_ssl=False)
        #创建ClientSession对象,类似于requests的session对象
        #可直接调用该对象的get方法访问网页
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                #通过proxy传递参数给get()
                #TEST_URL测试url
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    #VALID_STATUS_CODES,状态码列表
                    if response.status in VALID_STATUS_CODES:
                        #max()将代理分数设为100
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        #decrease()代理分数-1
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('请求代理失败', proxy)

    def run(self):
        '''
        测试主函数
        :return:
        '''
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            #BATCH_TEST_SIZE最大测试数
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                #获取测试代理
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #19
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()
        self.spider_log = logging.getLogger(TESTLOGGER)

    async def test_single_proxy(self, proxy, mode=None):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf-8')

        if mode is None:
            rediskey = REDIS_KEY
            url = TEST_URL
            proxy_prefix = 'http'

        elif mode == REDIS_HTTP:
            rediskey = REDIS_HTTP
            url = TEST_URL
            proxy_prefix = 'http'

        elif mode == REDIS_HTTPS:
            rediskey = REDIS_HTTPS
            url = HTTPSTEST_URL
            proxy_prefix = 'https'
        test_proxy = 'http://' + proxy

        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # self.spider_log.info('正在测试' + test_proxy)

                async with session.get(url,
                                       proxy=test_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy, mode)
                        # self.spider_log.info(proxy_prefix + '代理可用' + proxy)
                    else:
                        self.redis.decrease(proxy, mode)
                        self.spider_log.warn('请求响应码不合法 ' +
                                             str(response.status) + 'IP' +
                                             proxy_prefix + ":" + proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy, mode)
                # self.spider_log.warn(proxy_prefix + '代理请求失败' + proxy)

    def run(self, mode=None):
        """
        测试主函数
        :return:
        """
        self.spider_log.info('测试器定时开始')

        if mode is None:
            rediskey = REDIS_KEY
        else:
            rediskey = mode
        try:
            count = self.redis.count(mode)
            self.spider_log.info('测试器开始运行' + rediskey + '当前剩余' + str(count) +
                                 '个代理')

            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                self.spider_log.info('正在测试第' + str(start + 1) + '-' +
                                     str(stop) + '个' + rediskey + '代理')
                test_proxies = self.redis.batch(start, stop, mode=mode)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy, mode=mode)
                    for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            self.spider_log.error('测试器发生错误' + str(e.args))
            self.spider_log.error('traceback:' + traceback.format_exc())
Пример #20
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        # clientSession 客户端,TCPConnector 忽略证书验证
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # encoding
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                logger.info('正在测试%s' % proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=5,
                                       allow_redirects=False) as response:
                    # VALID_STATUS_CODES = [200, 302]
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        logger.info('代理可用%s' % proxy)
                    else:
                        self.redis.decrease(proxy)
                        logger.info('请求响应码不合法%s IP%s' %
                                    (response.status, proxy))
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                logger.info('代理请求失败%s' % proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        logger.info('测试器开始运行')
        try:
            count = self.redis.count()
            logger.info('当前剩余%d个代理' % count)
            # BATCH_TEST_SIZE = 10
            # In[1]: [i for i in range(0, 100, 10)] Out[2]: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                logger.info('正在测试第%d-%d个代理' % (start + 1, stop))
                test_proxies = self.redis.batch(start, stop)
                # asyncio/wait 协程
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                # 必须注释,否则报RuntimeError: Event loop is closed
                # loop.close()
                # 刷新输出
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            logger.exception('测试器发生错误%s' % e.args)