Пример #1
0
class Tester(object):
    """
    tester for testing proxies in queue
    """
    def __init__(self):
        """
        init redis
        """
        self.redis = RedisClient()
        self.loop = asyncio.get_event_loop()

    async def test(self, proxy: Proxy):
        """
        test single proxy
        :param proxy: Proxy object
        :return:
        """
        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(
                ssl=False)) as session:
            try:
                logger.debug(f'testing {proxy.string()}')
                async with session.get(TEST_URL,
                                       proxy=f'http://{proxy.string()}',
                                       timeout=TEST_TIMEOUT,
                                       allow_redirects=False) as response:
                    if response.status in TEST_VALID_STATUS:
                        self.redis.max(proxy)
                        logger.debug(
                            f'proxy {proxy.string()} is valid, set max score')
                    else:
                        self.redis.decrease(proxy)
                        logger.debug(
                            f'proxy {proxy.string()} is invalid, decrease score'
                        )
            except EXCEPTIONS:
                self.redis.decrease(proxy)
                logger.debug(
                    f'proxy {proxy.string()} is invalid, decrease score')

    @logger.catch
    def run(self):
        """
        test main method
        :return:
        """
        # event loop of aiohttp
        logger.info('stating tester...')
        count = self.redis.count()
        logger.debug(f'{count} proxies to test')
        for i in range(0, count, TEST_BATCH):
            # start end end offset
            start, end = i, min(i + TEST_BATCH, count)
            logger.debug(f'testing proxies from {start} to {end} indices')
            proxies = self.redis.batch(start, end)
            tasks = [self.test(proxy) for proxy in proxies]
            # run tasks using event loop
            self.loop.run_until_complete(asyncio.wait(tasks))
Пример #2
0
def checking(proxy):
    ocj = RedisClient()
    proxies = {
        'http': 'http://{}'.format(proxy),
        'https': 'https://{}'.format(proxy)
    }
    try:
        res_proxy = requests.get(TEST_URL,
                                 headers=TEST_HEADERS,
                                 proxies=proxies,
                                 verify=False,
                                 timeout=100)
        if res_proxy.status_code == 200:
            return proxy
        else:
            ocj.decrease(proxy)
            return None
    except ConnectionError and Timeout and HTTPError:
        ocj.decrease(proxy)
        return None
Пример #3
0
class Tester(object):
    """
    tester for testing proxies in queue
    """
    def __init__(self):
        """
        init redis
        """
        self.redis = RedisClient()
        self.loop = asyncio.get_event_loop()

    async def test(self, proxy: Proxy):
        """
        test single proxy
        :param proxy: Proxy object
        :return:
        """
        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(
                ssl=False)) as session:
            try:
                logger.debug(f'testing {proxy.string()}')
                # if TEST_ANONYMOUS is True, make sure that
                # the proxy has the effect of hiding the real IP
                if TEST_ANONYMOUS:
                    url = 'https://httpbin.org/ip'
                    async with session.get(url,
                                           timeout=TEST_TIMEOUT) as response:
                        resp_json = await response.json()
                        origin_ip = resp_json['origin']
                    async with session.get(url,
                                           proxy=f'http://{proxy.string()}',
                                           timeout=TEST_TIMEOUT) as response:
                        resp_json = await response.json()
                        anonymous_ip = resp_json['origin']
                    assert origin_ip != anonymous_ip
                    assert proxy.host == anonymous_ip
                async with session.get(TEST_URL,
                                       proxy=f'http://{proxy.string()}',
                                       timeout=TEST_TIMEOUT,
                                       allow_redirects=False) as response:
                    if response.status in TEST_VALID_STATUS:
                        self.redis.max(proxy)
                        logger.debug(
                            f'proxy {proxy.string()} is valid, set max score')
                    else:
                        self.redis.decrease(proxy)
                        logger.debug(
                            f'proxy {proxy.string()} is invalid, decrease score'
                        )
            except EXCEPTIONS:
                self.redis.decrease(proxy)
                logger.debug(
                    f'proxy {proxy.string()} is invalid, decrease score')

    @logger.catch
    def run(self):
        """
        test main method
        :return:
        """
        # event loop of aiohttp
        logger.info('stating tester...')
        count = self.redis.count()
        logger.debug(f'{count} proxies to test')
        cursor = 0
        while True:
            logger.debug(
                f'testing proxies use cursor {cursor}, count {TEST_BATCH}')
            cursor, proxies = self.redis.batch(cursor, count=TEST_BATCH)
            if proxies:
                tasks = [self.test(proxy) for proxy in proxies]
                self.loop.run_until_complete(asyncio.wait(tasks))
            if not cursor:
                break
Пример #4
0
class Tester(object):
    """
    测试队列中代理 的 测试器
    """
    def __init__(self):
        """
        初始化redis
        """
        self.redis = RedisClient()
        self.loop = asyncio.get_event_loop()

    # 测试匿名1
    async def test_anonymous1(self, proxy: Proxy, session):

        url = 'https://httpbin.org/ip'
        async with session.get(url, timeout=TEST_TIMEOUT) as response:
            resp_json = await response.json()
            origin_ip = resp_json['origin']
        async with session.get(url,
                               proxy=f'http://{proxy.string()}',
                               timeout=TEST_TIMEOUT) as response:
            resp_json = await response.json()
            anonymous_ip = resp_json['origin']

        logger.debug(
            f'只测试匿名代理: {origin_ip != anonymous_ip} -- 结果匿名ip:{anonymous_ip},代理ip:{proxy.string()}'
        )
        assert origin_ip != anonymous_ip
        assert proxy.host == anonymous_ip

    # 测试匿名2
    async def test_anonymous2(self, proxy: Proxy, session):
        url = 'http://km.chik.cn/ip'
        async with session.get(url, timeout=TEST_TIMEOUT) as response:
            resp_json = await response.json()
            origin_ip = resp_json['origin']
        async with session.get(url,
                               proxy=f'http://{proxy.string()}',
                               timeout=TEST_TIMEOUT) as response:
            resp_json = await response.json()
            anonymous_ip = resp_json['origin']

        logger.debug(
            f'只测试匿名代理2: {origin_ip != anonymous_ip} -- 结果匿名ip:{anonymous_ip},代理ip:{proxy.string()}'
        )
        assert origin_ip != anonymous_ip
        assert proxy.host == anonymous_ip

    async def test(self, proxy: Proxy):
        """
        测试单个代理
        :param proxy: 代理对象
        :return:
        """
        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(
                ssl=False)) as session:
            try:
                # logger.debug(f'测试代理ip为: {proxy.string()}')

                # 如果 TEST_ANONYMOUS 设置为 True, 确保
                # 代理具有隐藏真实IP的作用
                # TEST_ANONYMOUS 测试匿名
                if TEST_ANONYMOUS:
                    await self.test_anonymous1(proxy, session)
                if TEST_ANONYMOUS_MYSELF:
                    await self.test_anonymous2(proxy, session)

                async with session.get(TEST_URL,
                                       proxy=f'http://{proxy.string()}',
                                       timeout=TEST_TIMEOUT,
                                       allow_redirects=False) as response:
                    if response.status in TEST_VALID_STATUS:
                        self.redis.max(proxy)
                        logger.debug(f'代理 {proxy.string()} 是有效的, 设置最大分数')
                    else:
                        self.redis.decrease(proxy)
                        logger.debug(f'代理 {proxy.string()} 不可使用, 降低分数')

            except EXCEPTIONS:
                self.redis.decrease(proxy, score=-10)
                # logger.error(f'代理 {proxy.string()} 异常, 降低分数')

    @logger.catch
    def run(self):
        """
        主测试程序
        :return:
        """
        # event loop of aiohttp 的事件循环
        logger.info('开始测试...')

        # proxies = convert_proxy_or_proxies("191.235.98.23:3128")
        # [self.test(proxy) for proxy in proxies]

        count = self.redis.count()
        logger.debug(f'共 {count} 个代理等待测试')
        cursor = 0
        while True:
            logger.debug(f'测试代理游标 {cursor}, count {TEST_BATCH}')
            cursor, proxies = self.redis.batch(cursor, count=TEST_BATCH)
            if proxies:
                # proxies = convert_proxy_or_proxies("191.235.98.23:3128")
                tasks = [self.test(proxy) for proxy in proxies]
                self.loop.run_until_complete(asyncio.wait(tasks))
            if not cursor:
                break