Пример #1
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=10) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应不合法', proxy)
            except (ClientConnectionError, ClientError, ConnectTimeout):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0,len(proxies),BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)

    def test_single_tread(self, proxy):
        real_proxy = {'https': 'https://' + proxy}
        print('测试', real_proxy)
        try:
            res = requests.get(TEST_URL, proxies=real_proxy, timeout=10)
            if res.status_code in VALID_STATUS_CODES:
                self.redis.max(proxy)
            else:
                self.redis.decrease(proxy)
        except (ConnectionError, ConnectTimeout):
            self.redis.decrease(proxy)
            print('代理请求失败', proxy)

    def new_run(self):
        for ip in self.redis.all():
            self.test_single_tread(ip)
Пример #2
0
class ValidTester(object):
    def __init__(self, website='default'):
        """
        父类,初始化一些对象
        :param website: 名称
        """
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)
    
    def test(self, username, cookies):
        """
        测试Cookies是否有效,子类需要重写
        :param username: 用户名
        :param cookies: cookies
        """
        raise NotImplementedError
    
    def run(self):
        """
        运行,测试所有cookies是否有效
        """
        cookies_groups = self.cookies_db.all()
        # print(cookies_groups)
        for username, cookies in cookies_groups.items():
            # print(username, cookies)
            self.test(username, cookies)
Пример #3
0
class Test_ip(object):
    def __init__(self):
        self.db = RedisClient()
        self.headers = headers
        self.url = test_url
    def get_url(self,proxy):
        try:
            con =  requests.get(self.url,headers = self.headers,proxies = proxy)
            if con.status_code==200:
                return True
            else:
                return False
        except:
            return False
    def test(self,ip):
        ip = ip.decode('utf-8')
        proxy = {'http':'http://'+ip}
        test_result = self.get_url(proxy)
        if test_result:
            self.db.max(ip,)
        else:
            self.db.decrease(ip)
    def run(self):
        proxies = self.db.all()
        for i in range(len(proxies)):
            ip = proxies[i]
            t = threading.Thread(target=self.test,args=(ip,))
            t.setDaemon(True)
            t.start()
            random_time()
            if i%100==0:
                time.sleep(5)
Пример #4
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self,proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy,bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试',proxy)
                async with session.get(TEST_URL,proxy=real_proxy,timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用",proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("请求响应码不合法",proxy)
            except (Exception):
                self.redis.decrease(proxy)
                print("代理请求失败",proxy)

    def run(self):
        print("测试器开始运行")
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0,len(proxies),BATCH_TEST_SIZE):
                test_proxies = proxies[i:i+BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print("测试器发生错误",e.args)
Пример #5
0
class PoolTester(object):
    def __init__(self):
        self.redis = RedisClient()

    def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """

        if test_proxy_vaild(proxy):
            self.redis.max(proxy)
            print("[+] 代理可用", proxy)
        else:
            self.redis.drop(proxy)
            print("[-] 代理不可用", proxy)

    def run(self):
        """
        测试的主函数
        :return:
        """
        print("测试器开始运行.......")
        try:
            count = self.redis.count()
            print("当前剩余%d个代理" % (count))
            # 使用线程池, 快速检测proxy是否可用
            with ThreadPoolExecutor(FilterTreadCount) as pool:
                pool.map(self.test_single_proxy, self.redis.all())
        except Exception as e:
            print("测试器发生错误", e)
Пример #6
0
class PoolTester(object):
    def __init__(self):
        self.redis = RedisClient()

    def testSingleProxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        if testProxyVaild(proxy):
            self.redis.max(proxy)
            print(Fore.GREEN + "[+] 代理可用", proxy)
        else:
            self.redis.drop(proxy)
            print(Fore.RED + "[-] 代理不可用", proxy)

    def run(self):
        """
        测试的主函数
        :return:
        """
        print(Fore.GREEN + "测试器开始运行.......")
        try:
            count = self.redis.count()
            print(Fore.GREEN + "当前剩余%d个代理" % count)
            # 使用线程池, 快速检测proxy是否可用
            with ThreadPoolExecutor(FILTER_THREAD_COUNT) as pool:
                pool.map(self.testSingleProxy, self.redis.all())
        except Exception as e:
            print(Fore.RED + "测试器发生错误", e)
Пример #7
0
class Tester():
    """
    验证代理
    """
    def __init__(self):
        self.redis = RedisClient()

    def test_single_proxy(self,proxy):
        """
        测试单个代理
        """
        try:
            response = requests.get(url=TEST_URL, timeout=5)
            if response.status_code in VALID_STATUS_CODES:
                self.redis.max(proxy) #测试成功 将代理分数设置到最大
                print('测试成功', proxy, time.strftime('%Y-%m-%d %H:%M', time.localtime(time.time())))
            else:
                #print('代理测试失败',proxy, response.status_code)
                self.redis.decrease(proxy)
        except RequestException:
            print('代理测试请求异常', proxy)
            self.redis.decrease(proxy)

    def run(self):
        print('测试器开始 测试代理%d个' % self.redis.count(), time.strftime('%Y-%m-%d %H-%M',time.localtime(time.time())))
        # 从数据库获取全部
        proxies = self.redis.all()
        for proxy in proxies:
            self.test_single_proxy(proxy)
Пример #8
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        '''
        测试单个代理
        :param proxy: 单个代理
        :return: None
        '''
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # 判断一个对象是否是一直类型
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试...')
                async with session.get(test_url, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in valid_status_codes:
                        self.redis.max(proxy)  # 代理可用,代理设置为最大值
                        print('代理可用')
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法,代理检测失败')
            except Exception as e:
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        '''
        测试主函数
        :return: None
        '''
        print('测试器开始运行>>>>>>')
        try:
            proxie = self.redis.all()  # 获取全部代理
            loop = asyncio.get_event_loop()
            # asyncio实现并发,就需要多个协程组成列表来完成任务【创建多个协程的列表,然后将这些协程注册到事件循环中】,
            # 每当有任务阻塞的时候就await,然后其他协程继续工作,所以下面是协程列表;
            # 所谓的并发:多个任务需要同时进行;

            # 批量测试
            for i in range(0, len(proxie), batch_test_size):
                test_proxies = proxie[i:i + batch_test_size]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
            print('检测完成')

        except Exception as e:
            print('测试发生错误!!', e)
Пример #9
0
class Checker(object):
    def __init__(self):
        self.db = RedisClient()
        self.counts = defaultdict(int)

    def check(self, proxy):
        """
        测试代理,返回测试结果
        :param proxy: 代理
        :return: 测试结果
        """
        try:
            response = requests.get(settings.TEST_URL,
                                    proxies={
                                        'http': 'http://' + proxy,
                                        'https': 'https://' + proxy
                                    },
                                    timeout=settings.TEST_TIMEOUT)
            logger.debug(f'Using {proxy} to test {settings.TEST_URL}...')
            if response.status_code == 200:
                return True
        except (ConnectionError, ReadTimeout):
            return False

    def run(self):
        """
        测试一轮
        :return:
        """
        proxies = self.db.all()
        logger.info(f'Try to get all proxies {proxies}')
        for name, proxy in proxies.items():
            # 检测无效
            if not self.check(proxy):
                logger.info(f'Proxy {proxy} invalid')
                self.counts[proxy] += 1
            else:
                logger.info(f'Proxy {proxy} valid')
            count = self.counts.get(proxy) or 0
            logger.debug(
                f'Count {count}, TEST_MAX_ERROR_COUNT {settings.TEST_MAX_ERROR_COUNT}'
            )
            if count >= settings.TEST_MAX_ERROR_COUNT:
                self.db.remove(name)

    def loop(self):
        """
        循环测试
        :return:
        """
        while True:
            logger.info('Check for infinite')
            self.run()
            logger.info(f'Tested, sleeping for {settings.TEST_CYCLE}s...')
            time.sleep(settings.TEST_CYCLE)
Пример #10
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:       单个代理
        :return:            None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:    None
        """
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            count = self.redis.count()
            print(count)
            # 批量测试
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #11
0
class Tester:
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy: 单个代理
        :return: None
        """
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf-8')
        real_proxy = 'http://' + proxy
        conn = aiohttp.TCPConnector(verify_ssl=False)
        try:
            async with aiohttp.ClientSession(headers=settings.headers,
                                             connector=conn) as session:
                print('正在测试', proxy)
                rsp = await session.get(settings.target_url,
                                        proxy=real_proxy,
                                        timeout=5)
                if rsp.status == 200:
                    self.redis.max(proxy)
                    print('代理可用', proxy)
                else:
                    self.redis.decrease(proxy)
                    print('代理请求失败', proxy)
                    raise HttpProcessingError(code=rsp.status,
                                              message=rsp.reason)
        except Exception as e:
            self.redis.decrease(proxy)
            print('代理请求失败', proxy)
            print(e.__cause__)

    def run(self):
        """
        测试主函数
        :return: None
        """
        print('开始测试...')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, len(proxies), settings.test_request_count):
                test_proxies = proxies[i:i + settings.test_request_count]
                task = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(task))
                time.sleep(5)
        except Exception as e:
            print('测试出现异常', e.args)
Пример #12
0
class ValidTester(object):
    def __init__(self, website='default'):
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)

    def test(self, username, cookies):
        raise NotImplementedError

    def run(self):
        cookies_group = self.cookies_db.all()
        for username, cookies in cookies_group:
            self.test(username, cookies)
Пример #13
0
class Tester():
    def __init__(self, website="default"):
        self.website = website
        self.cookie_db = RedisClient('cookies', self.website)
        self.account_db = RedisClient('accounts', self.website)

    def test(self, username, cookie):
        raise NotImplementedError

    def run(self):
        cookies = self.cookie_db.all()
        for username, cookie in cookies.items():
            self.test(username, cookie)
Пример #14
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()
    
    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print( 'IP', proxy,'请求响应码不合法 ')
            except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)
    
    def run(self):
        print('测试器运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0,len(proxies),BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
            '''
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                print(tasks)
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
            '''
        except Exception as e:
            print(e.args)
Пример #15
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    """
    测试类,会根据提供的测试IP地址来判断代理是否可用
    """

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        @param proxy: 单个代理
        return None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                real_proxy = 'http://%s' % proxy
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=2) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
            except (ClientError, ClientConnectorError, TimeoutError,
                    AttributeError):
                self.redis.decrease(proxy)

    def run(self):
        """
        测试主函数
        """
        print('Starting test')
        try:
            # 获取所有的代理
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            # 批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]

                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]

                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('Tester errors', e.args)
class Tester(object):   # 定义一个类Tester
    def __init__(self):
        self.redis = RedisClient()  # 建立一个RedisClient对象,供该对象中其他方法使用

    async def test_single_proxy(self):  # 定义test_single_proxy方法,检测单个代理的可用情况,参数就是被检测的代理,async异步
        """
        测试单个代理
        :param proxy: 单个代理
        :return:  None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:    # 创建aiohttp的ClientSession对象,
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: # 通过proxy传递get方法
                    # 测试的链接定义为常量TEST_URL,对某个网站有抓取需求,将TEST_URL设置为目标网站地址
                    if response.status in VALID_STATUS_CODES:   # 定义VALID_STATUS_CODES变量,是列表形式,包含正常状态码
                        self.redis.max(proxy)   # 调用RedisClient的max方法将代理分数设置为100
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)  # 否则调用decrease方法将代理分数减1
                        print('请求相应码不合法', proxy)
            except (ClientError, ClientConnectorError, TimeoutError, AttributeError):
                self.redis.decrease(proxy)  # 出现异常也同样将代理分数减1
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return: None
        """
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            # 批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #17
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(ssl=False)

        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf-8")

                real_proxy = "http://" + proxy
                print("正在测试", proxy)

                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("请求响应码不合法", proxy)
            except (aiohttp.ClientError, aiohttp.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print("代理请求失败", proxy)

    def run(self):
        print("测试器开始执行")
        try:
            proxies = self.redis.all()
            count = self.redis.count()
            loop = asyncio.get_event_loop()

            for i in range(0, count, BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print("测试器发生错误", e.args)
Пример #18
0
class Sync(object):
    """
    mysql与redis数据同步
    """
    def __init__(self):
        self.redis = RedisClient()
        self.pool = ProxyPool.objects.filter(is_exsist=True)
        # ProxyPool.objects.filter(proxy='').delete()

    def sync_start(self):
        # mysql同步到redis
        for item in self.pool:
            proxy = item.proxy
            score = item.score
            self.redis.add(proxy, score, mysql_save=False)
        # redis同步到mysql
        for proxy in self.redis.all():
            self.redis.mysql_add(proxy)
Пример #19
0
class Tester:
    """检测代理池中代理是否可用,可用则分数至为100,否则分数减一"""
    def __init__(self):
        self.redis = RedisClient()

    async def single_test(self, proxy):
        """单个代理测试"""
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = "http://" + proxy
                print("正在测试:", proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status_code in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print(proxy, '代理可用')
                    else:
                        self.redis.decrease(proxy)
                        print(proxy, 'IP 请求响应码不合法')
            except (ClientError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """异步测试"""
        print('开始测试代理')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')

            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                test_proxies = proxies[start:stop]
                tasks = [self.single_test(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(randint(1, 5))
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #20
0
class Tester:
    def __init__(self):
        """初始化数据库管理对象"""
        self.redis = RedisClient()

    async def test_one_proxy(self, proxy):
        """对目标网站测试一个代理是否可用"""
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    # 解码为字符串
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=30) as response:
                    if response.status in TRUE_STATUS_CODE:
                        # 代理可用
                        self.redis.max(proxy)
                        print(proxy, 100, '可用')
                    else:
                        # 代理不可用
                        self.redis.decrease(proxy)
                        print(proxy, -1, "状态码错误")
            except Exception as e:
                self.redis.decrease(proxy)
                print(proxy, -1, e.args)

    async def start(self):
        """启动协程, 测试所有代理"""
        try:
            proxies = self.redis.all()
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_one_proxy(proxy) for proxy in test_proxies]
                await asyncio.gather(*tasks)
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)

    def run(self):
        asyncio.run(self.start())
Пример #21
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
            except (aiohttp.ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)

    def run(self):
        print('测试器开始运行!')
        try:
            proxies = self.redis.all()
            count = len(proxies)
            print('当前共有{0}个代理'.format(count))
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                end = min(i + BATCH_TEST_SIZE, count)
                test_proxies = proxies[start:end]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop = asyncio.get_event_loop()
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #22
0
class Tester_2:
    def __init__(self):
        self.redis = RedisClient()
        self.headers = {
            'user-agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/73.0.3683.103 Safari/537.36'
        }

    def single_test(self, proxy):
        proxies = {'http': 'http://' + proxy, 'https': 'https://' + proxy}
        try:
            resp = requests.get(TEST_URL,
                                proxies=proxies,
                                headers=self.headers)
            if resp.status_code == 200:
                print(proxy, '代理可用')
                self.redis.max(proxy)
            else:
                self.redis.decrease(proxy)
                print(proxy, 'IP 请求响应码不合法')
        except ConnectionError:
            print('代理请求失败', proxy)
            self.redis.decrease(proxy)

    def run(self):
        count = self.redis.count()
        print('共有', count, '个代理')
        print('开始检测代理')
        proxies = self.redis.all()
        i = 0
        try:
            for proxy in proxies:
                print(proxy)
                i += 1
                self.single_test(proxy)
                time.sleep(randint(1, 5))
                if i == 15:
                    break
        except Exception as e:
            print('测试器发生错误', e)
Пример #23
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """Test single proxy"""
        try:
            conn = TCPConnector(verify_ssl=False)
            async with ClientSession(connector=conn) as session:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试: ', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=10) \
                        as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用: ', proxy)
                    else:
                        self.redis.derease(proxy)
                        print('请求响应不合法: ', proxy)
        except(ClientError, ClientConnectorError, TimeoutError, \
                AttributeError):
            self.redis.decrease(proxy)
            print('代理请求失败: ', proxy)

    def run(self):
        """Test function"""
        print('开始测试...')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for index in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[index:index + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试发生错误', e.args)
Пример #24
0
class Getter(object):
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshhold(self):
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print('获取器开始执行')
        if not self.is_over_threshhold():
            proxies = self.crawler.run()
            for ip in proxies:
                self.redis.add(ip)
                print('已抓取', ip)
        print('结束,共抓取', self.redis.count())
        for i in self.redis.all():
            print(i, '当前分数', self.redis.score(i))
Пример #25
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """测试单个代理"""
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)

            except (ClientProxyConnectionError, ServerDisconnectedError, ClientOSError, ClientHttpProxyError,
                    TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """测试主函数"""
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(1, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #26
0
class Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def single_test(self, proxy):
        # try connecting with single proxy
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                real_proxy = "https://" + proxy.string()
                print("testing", proxy)
                async with session.get(TEST_URL,
                                       allow_redirects=False,
                                       proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODE:
                        self.redis.max(proxy)
                        print("Available proxy:", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("Not Available Status:", proxy, " Score -1")
            except (aiohttp.ClientError, aiohttp.ClientConnectorError,
                    TimeoutError, AttributeError, aiohttp.ClientOSError,
                    aiohttp.ClientHttpProxyError):
                self.redis.decrease(proxy)
                print("Error detected!", proxy)

    def run(self):
        print("Starts running tester")
        try:
            entries = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, len(entries), 200):
                test_proxies = entries[i:i + BATCH_TEST_SIZE]
                tasks = [self.single_test(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print(' Error with tester ', e.args)
Пример #27
0
def main():
    redis_cli = RedisClient()
    while True:
        thread_list = []
        proxy_list = redis_cli.all()  # 获取所有代理

        # 多线程检测
        if proxy_list:
            for i in range(5):
                t = TestProxy(proxy_list)
                thread_list.append(t)

            for i in thread_list:
                i.start()

            for i in thread_list:
                i.join()
        else:
            print('代理池为空')

        print(f'检测完毕,暂停{TEST_CYCLE}秒')
        time.sleep(TEST_CYCLE)
Пример #28
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def testSingleProxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')

                    real = 'http://' + proxy
                    print('Testing')

                    async with session.get(TEST_URL, proxy=real,
                                           timeout=10) as resp:
                        if resp.status in VAILD_STATUS_CODE:
                            self.redis.max(proxy)
                        else:
                            self.redis.decrease(proxy)
            except (ClientError, ClientConnectorError, TimeoutError,
                    AttributeError):
                self.redis.decrease(proxy)

    def run(self):
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()

            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                testProxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.testSingleProxy(proxy) for proxy in testProxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print(e.args)
Пример #29
0
class Tester:
    def __init__(self, website='tianyancha'):
        """初始化数据库管理对象"""
        self.website = website
        self.redis = RedisClient('accounts', self.website)

    async def test_one_proxy(self, key, proxy):
        """对目标网站测试一个cookies是否可用"""
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # 解码为字符串
                headers = {
                    "Accept":
                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                    "Accept-Encoding":
                    "gzip, deflate, br",
                    "Accept-Language":
                    "zh-CN,zh;q=0.9",
                    "Cache-Control":
                    "max-age=0",
                    "Connection":
                    "keep-alive",
                    "Cookie":
                    proxy[:-1],
                    "Host":
                    "www.tianyancha.com",
                    "Upgrade-Insecure-Requests":
                    "1",
                    "User-Agent":
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
                }

                # async with session.get(TEST_URL, headers=headers, timeout=30) as response:
                try:
                    response = requests.get(TEST_URL,
                                            headers=headers,
                                            timeout=30)
                    result = response.text
                    html = etree.HTML(result)
                    # print("".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()')))
                    user = "".join(
                        html.xpath('//span[@class="ni-sp-name"]//text()'))
                    print(user, '*' * 20)
                    """"".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()'))"""
                    if response.status_code in TRUE_STATUS_CODE and user:
                        # cookie可用
                        self.redis.max(key, proxy)
                        print(key, 100, '可用')
                    else:
                        # cookie不可用
                        # send = Send_Click()
                        # staus = send.run(proxy)
                        # if staus:
                        #     self.redis.max(key, proxy)
                        #     print(key, 100, "通过点字验证")
                        # else:
                        a = self.redis.decrease(key, proxy)
                        print(key, -20, "状态码错误")
                except Exception as e:
                    print(key, '请求错误', -20, e)
            except Exception as e:
                # self.redis.decrease(key, proxy)
                print(key, '测试错误', -20, e)

    async def start(self):
        """启动协程, 测试所有cookies"""
        try:
            keys = self.redis.get()
            for key in keys:
                if "tianyancha" not in key:
                    proxies = self.redis.all(key)
                    print(key)
                    for i in range(0, len(proxies)):
                        test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                        tasks = [
                            self.test_one_proxy(key, proxy)
                            for proxy in test_proxies
                        ]
                        asyncio.gather(*tasks)
                        time.sleep(5)
                else:
                    pass
        except Exception as e:
            print('测试器发生错误', e.args)

    def run(self):
        asyncio.run(self.start())
Пример #30
0
        except Exception as e:
            print('测试器发生错误', e.args)

    def test_single_tread(self, proxy):
        real_proxy = {'https': 'https://' + proxy}
        print('测试', real_proxy)
        try:
            res = requests.get(TEST_URL, proxies=real_proxy, timeout=10)
            if res.status_code in VALID_STATUS_CODES:
                self.redis.max(proxy)
            else:
                self.redis.decrease(proxy)
        except (ConnectionError, ConnectTimeout):
            self.redis.decrease(proxy)
            print('代理请求失败', proxy)

    def new_run(self):
        for ip in self.redis.all():
            self.test_single_tread(ip)


if __name__ == '__main__':
    db = RedisClient()
    tester = Tester()
    # while True:
    #     print('测试器开始')
    #     tester.run()
    #     time.sleep(5)
    for i in db.all():
        tester.test_single_tread(i)