Пример #1
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        text one proxy, if valid, put them to usable_proxies.
        """
        try:
            async with aiohttp.ClientSession() as session:
                try:
                    if isinstance(proxy, bytes):
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    print('Testing', proxy)
                    async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response:
                        if response.status == 200:
                            self._conn.put(proxy)
                            print('Valid proxy', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Invalid proxy', proxy)
        except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s:
            print(s)
            pass

    def test(self):
        """
        aio test all proxies.
        """
        print('ValidityTester is working')
        try:
            if self._raw_proxies:
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
        except ValueError:
            print('Async Error')
Пример #2
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = RedisClient()

    async def test_single_proxy(self, proxy):
        """
                text one proxy, if valid, put them to usable_proxies.
        """
        async with aiohttp.ClientSession() as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('Testing ' + real_proxy)
                #  使用百度进行测试,如果能够访问,则说明代理ip可以用
                async with session.get(self.test_api,
                                       proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status == 200:
                        self._conn.put(proxy)
                        print('Vaild proxy', proxy)
            except (ProxyConnectionError, TimeoutError, ValueError):
                print('Invaild proxy', proxy)

    def test(self):
        """
                aio test all proxies.
        """
        try:
            loop = asyncio.get_event_loop()
            tasks = [
                self.test_single_proxy(proxy) for proxy in self._raw_proxies
            ]
            loop.run_until_complete(asyncio.wait(tasks))
        except ValueError:
            print('asyncio error')
Пример #3
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        text one proxy, if valid, put them to usable_proxies.
        """
        try:
            async with aiohttp.ClientSession() as session:
                try:
                    if isinstance(proxy, bytes):
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    print('Testing', proxy)
                    async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response:
                        if response.status == 200:
                            self._conn.put(proxy)
                            print('Valid proxy', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Invalid proxy', proxy)
        except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s:
            print(s)
            pass

    def test(self):
        """
        aio test all proxies.
        """
        print('ValidityTester is working')
        try:
            loop = asyncio.get_event_loop()
            tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies]
            loop.run_until_complete(asyncio.wait(tasks))
        except ValueError:
            print('Async Error')
Пример #4
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = RedisClient()

    async def test_single_proxy(self, proxy):#定义协程函数
        """
        text one proxy, if valid, put them to usable_proxies.
        """
        try:
            async with aiohttp.ClientSession() as session:#aiohttp异步请求库,相当于s = requests.Session() ,创建一个session对象,然后用session对象去打开网页
                try:
                    if isinstance(proxy, bytes):#判断proxy是否是bytes类型
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    print('Testing', proxy)
                    async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response:#相当于response=urllib.request.urlopen(url)
                        if response.status == 200:
                            self._conn.put(proxy)
                            print('Valid proxy', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Invalid proxy', proxy)
        except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s:
            print(s)
            pass

    def test(self):
        """
        aio test all proxies.
        """
        print('ValidityTester is working')

        loop = asyncio.get_event_loop()#启动协程函数
        tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies]
        loop.run_until_complete(asyncio.wait(tasks))#tasks是一个asyncio.ensure_future(协程函数(参数))的列表,相当于多任务,异步执行tasks里的所有任务
Пример #5
0
class ApiTestCase(unittest.TestCase):
    def setUp(self):
        self._app = app.test_client()
        self._conn = RedisClient()

    def tearDown(self):
        self._conn.flush()

    def test_get(self):
        self._conn.put('aaa')
        self._conn.put('bbb')
        r = self._app.get('/get')
        assert 'aaa' in str(r.data)
        r = self._app.get('/get')
        assert 'bbb' in str(r.data)

    def test_count(self):
        self._conn.put('aaa')
        self._conn.put('bbb')
        r = self._app.get('/count')
        assert '2' in str(r.data)
        self._conn.put('ccc')
        self._conn.put('ddd')
        r = self._app.get('/count')
        assert '4' in str(r.data)
        proxy = self._conn.pop()
        r = self._app.get('/count')
        assert '3' in str(r.data)
Пример #6
0
class RedisClientTestCase(unittest.TestCase):
    def setUp(self):
        self._conn = RedisClient()

    def tearDown(self):
        self._conn.flush()

    def test_put_and_pop(self):
        self._conn.put("label")
        assert self._conn.pop() == "label"

    def test_put_many(self):
        self._conn.put_many(['a', 'b'])
        assert self._conn.pop() == "a"
        assert self._conn.pop() == "b"

    def test_len(self):
        self._conn.put_many(['a', 'b', 'c'])
        assert self._conn.queue_len == 3

    def test_get(self):
        self._conn.put_many(['a', 'b', 'c', 'd'])
        _ = self._conn.get(2)
        assert self._conn.queue_len == 2
Пример #7
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None  # 原始代理
        self._usable_proxies = []  # 可用的代理

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies  # 把接收到的proxies赋值给实例属性让内部使用
        self._conn = RedisClient()  # 创建redis连接

    # 测试单个代理,async设置为异步函数(协程)
    async def test_single_proxy(self, proxy):
        """
        text one proxy, if valid, put them to usable_proxies.
        """
        try:
            # 创建一个会话对象session发请求
            async with aiohttp.ClientSession() as session:
                try:
                    # 设置代理为utf-8
                    if isinstance(proxy, bytes):
                        proxy = proxy.decode('utf-8')
                    # 拼接代理地址
                    real_proxy = 'http://' + proxy
                    print('Testing', proxy)
                    # 用session发送get请求,用百度测试,设置代理检测
                    async with session.get(
                        self.test_api,
                        proxy=real_proxy,
                        timeout=get_proxy_timeout) as response:
                        # 如果响应状态码是200,说明代理可用,加入到代理队列右边
                        if response.status == 200:
                            self._conn.put(proxy)
                            print('Valid proxy', proxy)
                # 如果出现未连接成功,超时,值错误,那么就打印提示信息
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Invalid proxy', proxy)
        # 捕获里面异常
        except (ServerDisconnectedError, ClientResponseError,
                ClientConnectorError) as s:
            print(s)
            pass

    # 大量检测proxy
    def test(self):
        """
        aio test all proxies.
        """
        print('ValidityTester is working')
        try:
            # 协程不能直接运行,需要创建一个事件循环
            loop = asyncio.get_event_loop()
            # 任务对象列表,把每个代理交给检测方法,生成一个任务对象,把这些任务对象放到一个任务对象列表中
            tasks = [
                self.test_single_proxy(proxy) for proxy in self._raw_proxies
            ]
            # 调用run_until_complete方法,将协程注册到事件循环中,并启动事件循环
            loop.run_until_complete(asyncio.wait(tasks))
        except ValueError:
            print('Async Error')
Пример #8
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = RedisClient()

    # async def test_single_proxy(self, proxy):
    #     """
    #     text one proxy, if valid, put them to usable_proxies.
    #     """
    #     try:
    #         async with aiohttp.ClientSession() as session:
    #             try:
    #                 if isinstance(proxy, bytes):
    #                     proxy = proxy.decode('utf-8')
    #                 print('正在测试:', proxy)
    #                 async with session.get(self.test_api, proxy=proxy, timeout=get_proxy_timeout) as response:
    #                     if response.status == 200:
    #                         self._conn.put(proxy)
    #                         print('可用代理:', proxy)
    #             except Exception as e:
    #                 print(e)
    #                 print('不可用代理:', proxy)
    #     except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s:
    #         print(s)
    #         pass

    def test_single_proxy(self, proxy):
        """
        text one proxy, if valid, put them to usable_proxies.
        """
        try:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                # real_proxy = 'http://' + proxy
                print('正在测试:', proxy)
                # async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response:
                pro = re.split(r'[\:\/\/]', proxy)
                proxy_dict = {pro[0]: pro[3] + ':' + pro[4]}
                response = requests.get(url=TEST_API,
                                        proxies=proxy_dict,
                                        timeout=5)
                if response.status_code == 200:
                    self._conn.put(proxy)
                    print('可用代理:', proxy)
            except Exception as e:
                print(e)
                print('不可用代理:', proxy)
        except (ServerDisconnectedError, ClientResponseError,
                ClientConnectorError) as s:
            print(s)
            pass

    def test(self):
        """
        aio test all proxies.
        """
        print('进行代理可用性测试:')
        try:
            # loop = asyncio.get_event_loop()
            # tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies]
            # loop.run_until_complete(asyncio.wait(tasks))
            for proxy in self._raw_proxies:
                self.test_single_proxy(proxy)
        except Exception as e:
            print(e)
            print('Async Error!')
Пример #9
0
class ValidityTester(object):
    test_api = TEST_API

    def __init__(self):
        self._raw_proxies = None
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = RedisClient()

    def set_timing_params(self):
        self._conn = RedisClient()
        self._all_ips_item = self._conn.getAll()  #把现在所有的ip列表都拿出来做检查
        self._post_url = ALIE_API

    async def test_single_proxy(self, proxy):
        """
        text one proxy, if valid, put them to usable_proxies.
        """
        try:
            async with aiohttp.ClientSession() as session:
                try:
                    if isinstance(proxy, bytes):
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    print('Testing', proxy)
                    async with session.get(
                            self.test_api,
                            proxy=real_proxy,
                            timeout=get_proxy_timeout) as response:
                        if response.status == 200:
                            self._conn.put(proxy)
                            print('Valid proxy', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Invalid proxy', proxy)
        except (ServerDisconnectedError, ClientResponseError,
                ClientConnectorError) as s:
            print(s)
            pass

    def test(self):
        """
        aio test all proxies.
        """
        print('ValidityTester is working')
        try:
            loop = asyncio.get_event_loop()
            tasks = [
                self.test_single_proxy(proxy) for proxy in self._raw_proxies
            ]  #test_single_proxy  检验ip是否有效
            loop.run_until_complete(asyncio.wait(tasks))
            #loop.run_until_complete(asyncio.gather(self.test_single_proxy(proxy) for proxy in self._raw_proxies))
        except ValueError:
            print('Async Error')

    async def TimingCheckFunction(self, proxy):
        try:
            async with aiohttp.ClientSession() as session:
                try:
                    if isinstance(proxy, bytes):  #bytes=str
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    headers = {'User-Agent': choice(AGENTS)}
                    print('Timing Check Async Ip:' + str(proxy))
                    async with session.get(self._post_url,
                                           proxy=real_proxy,
                                           timeout=get_proxy_timeout,
                                           headers=headers) as response:
                        if (response.status != 200):
                            self._conn.delete(proxy)
                            print('Delete Old Invalid Proxy', proxy)
                        else:
                            print('Keep Save IP', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError):
                    print('Foreach Delete Invalid Proxy Error', proxy)
                    self._conn.delete(proxy)
        except (ServerDisconnectedError, ClientResponseError,
                ClientConnectorError) as s:
            print('-------')
            print(s)
            #self._conn.delete(proxy)
            pass

    def TimingCheck(self):
        try:
            loop = asyncio.get_event_loop()
            tasks = [
                self.TimingCheckFunction(proxy) for proxy in self._all_ips_item
            ]  #test_single_proxy  检验ip是否有效
            loop.run_until_complete(asyncio.wait(tasks))
        except ValueError:
            print('Timing Check Error')
Пример #10
0
class ValidityTester(object):
    """代理有效性测试器"""

    # 测试用的url
    test_api = TEST_API

    def __init__(self):
        # 为测试有效性的代理ip
        self._raw_proxies = None
        # 可用的代理ip
        self._usable_proxies = []

    def set_raw_proxies(self, proxies):
        """
        设置为测试有效性的代理ip列表
        :param proxies:代理ip列表
        :return:
        """
        self._raw_proxies = proxies
        self._conn = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试一个代理,如果有效就添加到_usable_proxies列表中
        :param proxy: 代理
        :return:
        """
        try:
            async with aiohttp.ClientSession() as session:
                try:
                    if isinstance(proxy, bytes):
                        proxy = proxy.decode('utf-8')
                    real_proxy = 'http://' + proxy
                    print('Testing', proxy)
                    async with session.get(
                            self.test_api,
                            proxy=real_proxy,
                            timeout=get_proxy_timeout) as response:
                        if response.status == 200:
                            self._conn.put(proxy)
                            print('Valid proxy', proxy)
                except (ProxyConnectionError, TimeoutError, ValueError,
                        ClientOSError):
                    print('Invalid proxy', proxy)
        except (ServerDisconnectedError, ClientResponseError,
                ClientConnectorError) as s:
            print('Invalid proxy', proxy)

    def test(self):
        """
        异步测试所用代理
        """
        print('ValidityTester is working')
        try:
            loop = asyncio.get_event_loop()
            tasks = [
                self.test_single_proxy(proxy) for proxy in self._raw_proxies
            ]
            loop.run_until_complete(asyncio.wait(tasks))
        except ValueError:
            print('Async Error')