class Getter(): def __init__(self): self.db = RedisClient() self.crawler = Crawler() def is_over_threshold(self): """ 是否超出代理池限制 """ if self.db.count() > MAX_POOL_COUNT: return True else: return False def run(self): """ :return: """ print('start to get proxy') if not self.is_over_threshold(): for item in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[item] proxies = self.crawler.get_proxy(callback) for proxy in proxies: self.db.add(proxy)
class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshold(self): """判断是否达到了代理池限制""" if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run_all(self): print(' 获取器开始执行 ') if not self.is_over_threshold(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[callback_label] proxies = self.crawler.get_proxies(callback) for proxy in proxies: self.redis.add(proxy) def run_specific(self, callback): print(' 获取器开始执行 ') if not self.is_over_threshold(): proxies = self.crawler.get_proxies(callback) for proxy in proxies: self.redis.add(proxy)
class Tester(): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self,proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy,bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://'+proxy print('正在测试',real_proxy) async with session.get(TEST_API,proxy=real_proxy,timeout=15,allow_redirects=False) as response: if response.status in VALID_STATUS_CODE: self.redis.enable(proxy) print('代理 ',proxy,' 可用') else: self.redis.decrease(proxy) print('代理 ',proxy,' 请求失败') except: self.redis.decrease(proxy) print('代理 ' ,proxy, ' 不可用') def run(self): print('开始测试') try: for i in range(0,self.redis.count(),BATCH_TEST_COUNT): start = i end = min(i+BATCH_TEST_COUNT,self.redis.count()) proxies = self.redis.batch(start,end) print('正在测试第 ',start,'到',end,'个代理') tasks = [self.test_single_proxy(proxy) for proxy in proxies] loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试错误 ', e.args)
from crawl import XiCiProxyHelper from storage import Booter, RedisClient from detector import Detector if __name__ == "__main__": xch = XiCiProxyHelper(quantity=40, threshold=1.000) rc = RedisClient() b = Booter(rc, xch) b.run() de = Detector(rc) de.run() print('一共有{}'.format(rc.count())) rc.show() rc.remove_by_range(0, 100)
'Please input config file path(if you use default file type \'d\'.): ') if path == 'd': path = 'proxy.conf' sure = input( 'Are you sure the config file in \'{}\'. [y/n]: '.format(path)) if sure == 'y': break cfg = ConfigParser() cfg.read(path) REDIS_HOST = try_to_get_options(cfg.get, 'redis', 'host') REDIS_PORT = try_to_get_options(cfg.getint, 'redis', 'port') REDIS_PASSWORD = try_to_get_options(cfg.get, 'redis', 'password') REDIS_KEY = try_to_get_options(cfg.get, 'redis', 'key') redis_client = RedisClient(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, s_key=REDIS_KEY) count = redis_client.count() if count == 0: print('Already cleaning!') else: redis_client.show() sure = input( 'Are you sure remove that data? amount {} items! [y/n]: '.format( count)) if sure == 'y': redis_client.remove_by_range(0, 100) else: print('Good luck! Bye Bye')