Exemplo n.º 1
0
class Getter():
    def __init__(self):
        self.db = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """
        是否超出代理池限制
        """
        if self.db.count() > MAX_POOL_COUNT:
            return True
        else:
            return False

    def run(self):
        """
        :return:
        """
        print('start to get proxy')
        if not self.is_over_threshold():
            for item in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[item]
                proxies = self.crawler.get_proxy(callback)
                for proxy in proxies:
                    self.db.add(proxy)
Exemplo n.º 2
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """判断是否达到了代理池限制"""
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run_all(self):
        print(' 获取器开始执行 ')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                proxies = self.crawler.get_proxies(callback)
                for proxy in proxies:
                    self.redis.add(proxy)

    def run_specific(self, callback):
        print(' 获取器开始执行 ')
        if not self.is_over_threshold():
            proxies = self.crawler.get_proxies(callback)
            for proxy in proxies:
                self.redis.add(proxy)
Exemplo n.º 3
0
from storage import RedisClient

rc = RedisClient('url', '127.0.0.1', None)
with open('ul.txt', 'r') as f:
	n = [line.rstrip() for line in f]
# print(n)

for x in n:
	rc.add(x)