Exemplo n.º 1
0
class Getter():
    def __init__(self):
        self.db = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """
        是否超出代理池限制
        """
        if self.db.count() > MAX_POOL_COUNT:
            return True
        else:
            return False

    def run(self):
        """
        :return:
        """
        print('start to get proxy')
        if not self.is_over_threshold():
            for item in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[item]
                proxies = self.crawler.get_proxy(callback)
                for proxy in proxies:
                    self.db.add(proxy)
Exemplo n.º 2
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """判断是否达到了代理池限制"""
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run_all(self):
        print(' 获取器开始执行 ')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                proxies = self.crawler.get_proxies(callback)
                for proxy in proxies:
                    self.redis.add(proxy)

    def run_specific(self, callback):
        print(' 获取器开始执行 ')
        if not self.is_over_threshold():
            proxies = self.crawler.get_proxies(callback)
            for proxy in proxies:
                self.redis.add(proxy)
Exemplo n.º 3
0
class Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self,proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy,bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://'+proxy
                print('正在测试',real_proxy)
                async with session.get(TEST_API,proxy=real_proxy,timeout=15,allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODE:
                        self.redis.enable(proxy)
                        print('代理 ',proxy,' 可用')
                    else:
                        self.redis.decrease(proxy)
                        print('代理 ',proxy,' 请求失败')
            except:
                self.redis.decrease(proxy)
                print('代理 ' ,proxy, ' 不可用')
    def run(self):
        print('开始测试')
        try:
            for i in range(0,self.redis.count(),BATCH_TEST_COUNT):
                start = i
                end = min(i+BATCH_TEST_COUNT,self.redis.count())
                proxies = self.redis.batch(start,end)
                print('正在测试第 ',start,'到',end,'个代理')
                tasks = [self.test_single_proxy(proxy) for proxy in proxies]
                loop = asyncio.get_event_loop()
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试错误 ', e.args)
Exemplo n.º 4
0
from crawl import XiCiProxyHelper
from storage import Booter, RedisClient
from detector import Detector

if __name__ == "__main__":
    xch = XiCiProxyHelper(quantity=40, threshold=1.000)
    rc = RedisClient()

    b = Booter(rc, xch)
    b.run()

    de = Detector(rc)

    de.run()
    print('一共有{}'.format(rc.count()))

    rc.show()

    rc.remove_by_range(0, 100)
Exemplo n.º 5
0
        'Please input config file path(if you use default file type \'d\'.): ')
    if path == 'd':
        path = 'proxy.conf'
    sure = input(
        'Are you sure the config file in \'{}\'. [y/n]: '.format(path))
    if sure == 'y':
        break

cfg = ConfigParser()
cfg.read(path)
REDIS_HOST = try_to_get_options(cfg.get, 'redis', 'host')
REDIS_PORT = try_to_get_options(cfg.getint, 'redis', 'port')
REDIS_PASSWORD = try_to_get_options(cfg.get, 'redis', 'password')
REDIS_KEY = try_to_get_options(cfg.get, 'redis', 'key')
redis_client = RedisClient(host=REDIS_HOST,
                           port=REDIS_PORT,
                           password=REDIS_PASSWORD,
                           s_key=REDIS_KEY)
count = redis_client.count()
if count == 0:
    print('Already cleaning!')
else:
    redis_client.show()
    sure = input(
        'Are you sure remove that data? amount {} items! [y/n]: '.format(
            count))
    if sure == 'y':
        redis_client.remove_by_range(0, 100)
    else:
        print('Good luck! Bye Bye')