Пример #1
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        headers = {
            'accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
            'accept-encoding':
            'gzip, deflate, br',
            'accept-language':
            'zh-CN,zh;q=0.9',
            'user-agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
        }
        try:
            if isinstance(proxy, bytes):
                proxy = proxy.decode('utf-8')
            real_proxy = 'http://' + proxy
            options = {}
            options["url"] = TEST_URL
            options["headers"] = headers
            options["proxy"] = real_proxy
            options["timeout"] = 5
            options["allow_redirects"] = False
            options["verify_ssl"] = False

            response = await requests.get(**options)
            print(response.status)
            if response.status in VALID_STATUS_CODES:
                self.redis.max(proxy)
            else:
                print('请求响应码不合法 ', response.status, 'IP', proxy)
                self.redis.delete(proxy)
        except (ClientError, aiohttp.client_exceptions.ClientConnectorError,
                asyncio.TimeoutError, AttributeError):
            self.redis.delete(proxy)
            print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #2
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = RedisClient()

    def set_timing_params(self):
        self._conn = RedisClient()
        self._all_ips_item = self._conn.getAll()  #把现在所有的ip列表都拿出来做检查
        self._post_url = ALIE_API

    async def test_single_proxy(self, proxy):
        """
        text one proxy, if valid, put them to usable_proxies.
        """
        try:
            async with aiohttp.ClientSession() as session:
                try:
                    if isinstance(proxy, bytes):
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    print('Testing', proxy)
                    async with session.get(
                            self.test_api,
                            proxy=real_proxy,
                            timeout=get_proxy_timeout) as response:
                        if response.status == 200:
                            self._conn.put(proxy)
                            print('Valid proxy', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Invalid proxy', proxy)
        except (ServerDisconnectedError, ClientResponseError,
                ClientConnectorError) as s:
            print(s)
            pass

    def test(self):
        """
        aio test all proxies.
        """
        print('ValidityTester is working')
        try:
            loop = asyncio.get_event_loop()
            tasks = [
                self.test_single_proxy(proxy) for proxy in self._raw_proxies
            ]  #test_single_proxy  检验ip是否有效
            loop.run_until_complete(asyncio.wait(tasks))
            #loop.run_until_complete(asyncio.gather(self.test_single_proxy(proxy) for proxy in self._raw_proxies))
        except ValueError:
            print('Async Error')

    async def TimingCheckFunction(self, proxy):
        try:
            async with aiohttp.ClientSession() as session:
                try:
                    if isinstance(proxy, bytes):  #bytes=str
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    headers = {'User-Agent': choice(AGENTS)}
                    print('Timing Check Async Ip:' + str(proxy))
                    async with session.get(self._post_url,
                                           proxy=real_proxy,
                                           timeout=get_proxy_timeout,
                                           headers=headers) as response:
                        if (response.status != 200):
                            self._conn.delete(proxy)
                            print('Delete Old Invalid Proxy', proxy)
                        else:
                            print('Keep Save IP', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Foreach Delete Invalid Proxy Error', proxy)
                    self._conn.delete(proxy)
        except (ServerDisconnectedError, ClientResponseError,
                ClientConnectorError) as s:
            print('-------')
            print(s)
            #self._conn.delete(proxy)
            pass

    def TimingCheck(self):
        try:
            loop = asyncio.get_event_loop()
            tasks = [
                self.TimingCheckFunction(proxy) for proxy in self._all_ips_item
            ]  #test_single_proxy  检验ip是否有效
            loop.run_until_complete(asyncio.wait(tasks))
        except ValueError:
            print('Timing Check Error')
Пример #3
0
class Tester(object):
    def __init__(self, data):
        self.redis = RedisClient(data)
        self.data = data

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(self.data['TEST_URL'],
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    text = await response.read()
                    if self.data['TEST_tage'] == 'in':
                        if bytes(self.data['TEST_if'],
                                 encoding="utf8") in text:
                            self.redis.max(proxy)
                            print('代理可用', proxy)
                        else:
                            self.redis.decrease(proxy)
                            print(
                                '不满足条件{}'.format(self.data['TEST_if'] +
                                                 self.data['TEST_tage']),
                                response.status, 'IP', proxy)
                    if self.data['TEST_tage'] == 'not in':
                        if bytes(self.data['TEST_if'],
                                 encoding="utf8") not in text:
                            self.redis.max(proxy)
                            print('代理可用', proxy)
                        else:
                            self.redis.decrease(proxy)
                            print(
                                '不满足条件{}'.format(self.data['TEST_if'] +
                                                 self.data['TEST_tage']),
                                response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.delete(proxy)
                print('代理请求失败', proxy)

    async def _single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, self.data['BATCH_TEST_SIZE']):
                start = i
                stop = min(i + self.data['BATCH_TEST_SIZE'], count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)


# Tester().run()