def FetchJob(): # 通过scheduler得到对应的app context对象 app = schedule.app with app.app_context(): store_type = current_app.config.get('DATA_STORE_TYPE') if store_type == 'mysql': db = sql else: db = RedisHelper(jobs.PROXY_RAW_KEY) import inspect member_list = inspect.getmembers(FetchFreeProxy, predicate=inspect.isfunction) proxy_set = set() for func_name, func in member_list: log.debug(u"开始获取代理: {}".format(func_name)) try: for proxy in func(): proxy = proxy.strip() if not proxy or not verifyProxyFormat(proxy): log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format(func=func_name, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: log.debug('ProxyFetch - {func}: ' '{proxy} exist'.format(func=func_name, proxy=proxy.ljust(20))) continue else: log.debug('ProxyFetch - {func}: ' '{proxy} success'.format(func=func_name, proxy=proxy.ljust(20))) # 保存数据 last_time = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") p = Proxy(name=func_name, proxy=proxy, last_time=last_time) # 持久化保存 if store_type == 'mysql': record = ProxyRaw(name=func_name, proxy=proxy, https=False, gmt_create=last_time, gmt_modified=last_time) with app.app_context(): db.session.add(record) db.session.commit() else: db.add(p.proxy, p.Json) # 保存到set中检查是否重复 proxy_set.add(p) except Exception as e: log.error(u"代理获取函数 {} 运行出错!".format(func_name)) # 执行相关统计数据 log.debug('本次插入代理总数: %s', len(proxy_set))
class CheckProcess(object): def __init__(self, queue: Queue): self.queue = queue app = schedule.app with app.app_context(): self.store_type = current_app.config.get('DATA_STORE_TYPE') if self.store_type == 'mysql': self.db = sql else: self.db = RedisHelper(jobs.PROXY_VALID_KEY) def run(self): if self.store_type != 'mysql': self.db.change(jobs.PROXY_VALID_KEY) while True: if self.queue.empty(): break # log.info('正在运行代理检查,检查队列是: {}'.format(self.queue.qsize())) proxy_data = self.queue.get() if proxy_data is None: break if self.store_type != 'mysql': proxyObj = Proxy.fromJson(proxy_data) else: proxyObj = proxy_data proxy, status = proxyObj.validateProxy() # log.info('执行检查结果: {}, 结果是: {}'.format(str(proxy),status)) if status or proxy.fail < FAIL_COUNT: # 保存到数据库中 if self.store_type != 'mysql': if self.db.exists(proxy.proxy): log.debug('ValidProxyCheck - {} : {} validation exists'.format(proxy.name, proxy.proxy.ljust(20))) self.db.add(proxy.proxy, proxy.Json) else: app = schedule.app with app.app_context(): proxy_info=self.db.session.query(ProxyValid).filter(ProxyValid.proxy==proxy.proxy).first() proxy_info.success=proxy.success proxy_info.fail=proxy.fail proxy_info.total=proxy.total proxy_info.quality=proxy.quality proxy_info.last_status=proxy.last_status proxy_info.gmt_modified= proxy.last_time self.db.session.commit() log.debug('ValidProxyCheck - {} : {} validation pass'.format(proxy.name, proxy.proxy.ljust(20))) else: log.debug('ValidProxyCheck - {} : {} validation fail'.format(proxy.name, proxy.proxy.ljust(20))) if self.store_type != 'mysql': self.db.delete(proxy.proxy) else: app = schedule.app with app.app_context(): self.db.session.query(ProxyValid).filter(ProxyValid.proxy==proxy.proxy).delete()
class CheckProcess(object): def __init__(self, queue: Queue): self.queue = queue app = schedule.app with app.app_context(): self.store_type = current_app.config.get('DATA_STORE_TYPE') if self.store_type == 'mysql': self.db = sql else: self.db = RedisHelper(jobs.PROXY_VALID_KEY) def run(self): if self.store_type != 'mysql': self.db.change(jobs.PROXY_VALID_KEY) while True: if self.queue.empty(): break proxy_data = self.queue.get() if proxy_data is None: break if self.store_type != 'mysql': proxyObj = Proxy.fromJson(proxy_data) else: proxyObj = proxy_data proxy, status = proxyObj.validateProxy() if status: # 保存到数据库中 if self.store_type != 'mysql': self.db.add(proxy.proxy, proxy.Json) else: proxy_valid = ProxyValid(name=proxy.name, proxy=proxy.proxy, https=proxy.https, proxy_type=proxy.type, china=proxy.china, location=proxy.location, success=proxy.success, fail=proxy.fail, total=proxy.total, quality=proxy.quality, last_status=proxy.last_status, gmt_modified=proxy.last_time) app = schedule.app with app.app_context(): self.db.session.add(proxy_valid) self.db.session.commit() log.debug('RawProxyCheck - {} : {} validation pass'.format(proxy.name, proxy.proxy.ljust(20))) else: log.error( 'RawProxyCheck - {} : {}, into time: {} validation fail'.format(proxy.name, proxy.proxy.ljust(20), proxy.last_time))