def scan(): logger.debug('请输入代理, 输入exit退出读入') while True: proxy = input() if proxy == 'exit': break set(proxy)
def max(self, proxy): """ 将代理设置为MAX_SCORE :param proxy: 代理 :return: 设置结果 """ logger.debug(f'代理 {proxy}, 可用,设置为 {MAX_SCORE}') return self.db.zadd(REDIS_KEY, MAX_SCORE, proxy)
def schedule_getter(self, cycle=GETTER_CYCLE): """ 定时获取代理 """ getter = Getter() while True: logger.debug('开始抓取代理') getter.run() time.sleep(cycle)
def schedule_tester(self, cycle=TESTER_CYCLE): """ 定时测试代理 """ tester = Tester() while True: logger.debug('测试器开始运行') tester.run() time.sleep(cycle)
def decrease(self, proxy): """ 代理值减一分,小于最小值则删除 :param proxy: 代理 :return: 修改后的代理分数 """ score = self.db.zscore(REDIS_KEY, proxy) if score and score > MIN_SCORE: logger.debug(f'代理 {proxy} 当前分数 {score} 减1') return self.db.zincrby(REDIS_KEY, proxy, -1) else: logger.debug(f'代理 {proxy} 当前分数 {score} 移除') return self.db.zrem(REDIS_KEY, proxy)
def add(self, proxy, score=INITIAL_SCORE): """ 添加代理,设置分数为最高 :param proxy: 代理 :param score: 分数 :return: 添加结果 """ if not re.match('\d+\.\d+\.\d+\.\d+\:\d+', proxy): logger.debug(f'代理不符合规范 {proxy} 丢弃') return if not self.db.zscore(REDIS_KEY, proxy): logger.info(f'ZADD {REDIS_KEY} {score} {proxy}') return self.db.zadd(REDIS_KEY, proxy, score)
async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy logger.debug(f'正在测试 {proxy}') async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) logger.debug(f'代理可用 {proxy}') else: self.redis.decrease(proxy) logger.debug(f'请求响应码不合法 {response.status}, IP {proxy}') except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) logger.debug(f'代理请求失败 {proxy}')
def run(self): logger.debug('代理池开始运行') if TESTER_ENABLED: tester_process = Process(target=self.schedule_tester) tester_process.start() if GETTER_ENABLED: getter_process = Process(target=self.schedule_getter) getter_process.start() if API_ENABLED: api_process = Process(target=self.schedule_api) api_process.start()
def run(self): """ 测试主函数 :return: """ logger.debug('测试器开始运行') try: count = self.redis.count() logger.debug(f'当前剩余 {count} 个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) logger.debug(f'正在测试第 {start + 1 - stop} 个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: logger.debug(f'测试器发生错误 {e.args}')
return self.db.zadd(REDIS_KEY, MAX_SCORE, proxy) def count(self): """ 获取数量 :return: 数量 """ return self.db.zcard(REDIS_KEY) def all(self): """ 获取全部代理 :return: 全部代理列表 """ return self.db.zrangebyscore(REDIS_KEY, MIN_SCORE, MAX_SCORE) def batch(self, start, stop): """ 批量获取 :param start: 开始索引 :param stop: 结束索引 :return: 代理列表 """ return self.db.zrevrange(REDIS_KEY, start, stop - 1) if __name__ == '__main__': conn = RedisClient() result = conn.batch(680, 688) logger.debug(result)
def run(self): logger.debug('获取器开始执行') if not self.is_over_threshold(): asyncio.run(self.get_proxies())
import requests from proxypool.settings import TEST_URL from proxypool.log import logger proxy = '96.9.90.90:8080' proxies = { 'http': 'http://' + proxy, 'https': 'https://' + proxy, } logger.debug(TEST_URL) response = requests.get(TEST_URL, proxies=proxies, verify=False) if response.status_code == 200: logger.debug('Successfully') logger.debug(response.text)
def set(proxy): result = conn.add(proxy) logger.debug(proxy) logger.debug('录入成功' if result else '录入失败')