def __init__(self, threshold): # 临界值 self._threshold = threshold self._conn = RedisClient() self._tester = ValidityTester() # 免费代理爬虫 self._crawler = FreeProxyGetter()
class PoolAdder(object): """ add proxy to pool """ def __init__(self, threshold): self._threshold = threshold self._conn = RedisClient() self._tester = ValidityTester() self._crawler = FreeProxyGetter() def is_over_threshold(self): """ judge if count is overflow. """ if self._conn.queue_len >= self._threshold: return True else: return False def add_to_queue(self): print('PoolAdder is working') proxy_count = 0 while not self.is_over_threshold(): for callback_label in range(self._crawler.__CrawlFuncCount__): callback = self._crawler.__CrawlFunc__[callback_label] raw_proxies = self._crawler.get_raw_proxies(callback) # test crawled proxies self._tester.set_raw_proxies(raw_proxies) self._tester.test() proxy_count += len(raw_proxies) if self.is_over_threshold(): print('IP is enough, waiting to be used') break if proxy_count == 0: raise ResourceDepletionError
class GetNewProxy(object): #得到新的代理ip def __init__(self,max): self._max_count=max self._redis=RedisClient() self._tester=ValidityTester() self._getter=FreeProxyGetter() def is_over(self): #是否超过总数 print(self._redis.len()) if self._redis.len()>=self._max_count: return True else: return False def add_new_proxy(self): #增加新的代理ip到代理池中 print('Add and get new proxy') while not self.is_over(): for callback in range(self._getter.__CrawlFuncCount__): # print(self._getter.__CrawlFunc__[callback]) self._tester.set_raw_proxies(self._getter.get_raw_proxies(self._getter.__CrawlFunc__[callback])) self._tester.check_some_proxies() if self.is_over(): print('IP is enough, waiting to be used') break
class PoolAdder(object): def __init__(self, upper_threshold): self._upper_threshold = upper_threshold #150 self._conn = RedisClient() self._tester = ValidityTester() self._crawler = FreeProxyGetter() def over_upper_threshold(self): """ 判断代理池是否过盈 """ return True if self._conn.list_len >= self._upper_threshold else False def add_to_pool(self): print('PoolAdder is working...') raw_proxies_count = 0 while not self.over_upper_threshold(): for callback_label in range(self._crawler.__CrawlFuncCount__): callback = self._crawler.__CrawlFunc__[callback_label] raw_proxies = self._crawler.get_raw_proxies( callback=callback) #执行回调函数,返回抓取的内容 self._tester.set_raw_proxies( raw_proxies) #修改self._raw_proxies的内容 self._tester.test() #进行测试 raw_proxies_count += len(raw_proxies) if self.over_upper_threshold(): print('IPs are enough, waiting to be used') break #当数据足够就退出 if raw_proxies_count == 0: raise ResourceDepletionError
class PoolAdder(object): """ 添加器,负责向池中补充代理 """ def __init__(self, threshold): self._threshold = threshold self._conn = RedisClient() self._tester = VaildityTester() self._crawler = FreeProxyGetter() def is_over_threshold(self): """ 判断代理池中的数量是否达到阈值 """ if self._conn.queue_len >= self._threshold: return True else: return False def add_to_queue(self): """ 命令爬虫抓取一定量未检测的代理,然后检测,将通过检测的代理加入到代理池中 """ Logger.log_normal('PoolAdder is working') proxy_count = 0 while not self.is_over_threshold(): for callback_label in range(self._crawler.__CrawlFuncCount__): callback = self._crawler.__CrawlFunc__[callback_label] raw_proxies = self._crawler.get_raw_proxies(callback) # test crawled proxies self._tester.set_raw_proxies(raw_proxies) self._tester.test() proxy_count += len(raw_proxies) if self.is_over_threshold(): Logger.log_high('IP is enough, waiting to be used') break if proxy_count == 0: raise ResourceDepletionError
class PoolAdder(object): """ add proxy to pool """ def __init__(self, threshold):#threshold是阀值 self._threshold = threshold self._conn = RedisClient() self._tester = ValidityTester() self._crawler = FreeProxyGetter()#实例化元类 def is_over_threshold(self): """ judge if count is overflow. """ if self._conn.queue_len >= self._threshold:#判断数据库内数据的数量是否溢出 return True else: return False def add_to_queue(self): print('PoolAdder is working') proxy_count = 0 while not self.is_over_threshold():#当数量没溢出时执行一下循环,创建类的实例对象 for callback_label in range(self._crawler.__CrawlFuncCount__):#每个数字对应一个代理 callback = self._crawler.__CrawlFunc__[callback_label] raw_proxies = self._crawler.get_raw_proxies(callback)#实例对象,此时raw_proxies具有__CrawlFuncCount__,__CrawlFunc__的属性 # test crawled proxies self._tester.set_raw_proxies(raw_proxies)#传入抓取的免费代理 self._tester.test() proxy_count += len(raw_proxies) if self.is_over_threshold():#溢出时进行的操作 print('IP is enough, waiting to be used') break if proxy_count == 0: raise ResourceDepletionError
def __init__(self, threshold): self._threshold = threshold self._conn = RedisClient() self._tester = ValidityTester() self._crawler = FreeProxyGetter() #从各大代理网站抓取的类
def __init__(self, threshold): self._threshold = threshold self._conn = RedisClient() # redis 连接 self._tester = ValidityTester() # 检测代理是否可用 self._crawler = FreeProxyGetter() # 各大网站抓取代理
def __init__(self,max): self._max_count=max self._redis=RedisClient() self._tester=ValidityTester() self._getter=FreeProxyGetter()
def __init__(self, threshold): self._threshold = threshold # 代理数量上限 self._conn = RedisClient() self._tester = ValidityTester() # 检测代理并存入redis self._crawler = FreeProxyGetter() # 从网页中获取免费代理
def __init__(self, threshold): self._threshold = threshold # 代理数量的上线 self._conn = RedisClient() self._tester = ValidityTester() self._crawler = FreeProxyGetter() # 动态获取代理
def __init__(self, threshold): self._threshold = threshold self._conn = RedisClient() self._tester = ValidityTester() self._crawler = FreeProxyGetter()
def __init__(self, upper_threshold): self._upper_threshold = upper_threshold #150 self._conn = RedisClient() self._tester = ValidityTester() self._crawler = FreeProxyGetter()