Пример #1
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        '''
        判断是否达到了代理池限制
        :return:
        '''
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print('获取器开始执行')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                # 获取代理
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    self.redis.add(proxy)
Пример #2
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    # async def test_single_proxy(self, proxy):
    #     """
    #     测试单个代理
    #     :param proxy:
    #     :return:
    #     """
    #     conn = aiohttp.TCPConnector(verify_ssl=False)
    #     async with aiohttp.ClientSession(connector=conn) as session:
    #         try:
    #             if isinstance(proxy, bytes):
    #                 proxy = proxy.decode('utf-8')
    #             real_proxy = 'http://' + proxy
    #             print('正在测试', proxy)
    #             async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response:
    #                 if response.status in VALID_STATUS_CODES:
    #                     self.redis.max(proxy)
    #                     print('代理可用', proxy)
    #                 else:
    #                     self.redis.decrease(proxy)
    #                     print('请求响应码不合法 ', response.status, 'IP', proxy)
    #         except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
    #             self.redis.decrease(proxy)
    #             print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #3
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @contact: [email protected]
# @software: PyCharm
# @time: 2019/12/26 下午7:34
# @site: www.gongyanli.com
# @file: importer.py
from ProxyPool.proxypool.db import RedisClient

conn = RedisClient()


def set(proxy):
    result = conn.add(proxy)
    print('录入成功' if result else '录入失败')


def scan():
    print('请输入代理,输入exit退出读取')
    while True:
        proxy = input()
        if proxy == 'exit':
            break
        set(proxy)


if __name__ == '__main__':
    scan()
Пример #4
0
 def __init__(self):
     self.redis = RedisClient()
Пример #5
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        headers = {
            "Connection":
            "keep-alive",
            "Host":
            "www.sogou.com",
            "Pragma":
            "no-cache",
            "User-Agent":
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36',
        }
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试' + proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False,
                                       headers=headers) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用' + proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ' + str(response.status) + 'IP' +
                              proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败{}'.format(proxy))

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余{}个代理'.format(count))
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第{}-{}个代理'.format(start + 1, stop))
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误{}'.format(e.args))
Пример #6
0
 def __init__(self):
     self.redis = RedisClient()
     self.crawler = Crawler()
Пример #7
0
def get_conn():
    if not hasattr(g, 'redis'):
        g.redis = RedisClient()
    return g.redis