def sever_chan(title: str = None, message: str = None) -> bool: """ 调用SERVER酱微信提示 @param title: 标题最大256 @param message: 正文,支持markdown,最大64kb @return: """ if not isinstance(title, str) or not isinstance(message, str): return False import requests url = f"http://sc.ftqq.com/{SERVER_CHAN_SCKEY}.send" params = {'text': title, 'desp': message} try: res = requests.get(url, params=params) res.raise_for_status() if res.status_code == 200 and res.json().get("errmsg") == 'success': logger.success("Server酱设备通知已发送~") return True except requests.exceptions.HTTPError: err_ = "Server酱404!!!可能原因为您的SCKEY未填写或已重置,请访问 http://sc.ftqq.com/3.version 查看解决方案\n" \ "工作流将保存此漏洞数据至error.log 并继续运行,希望您常来看看……" logger.error(err_) send_email(err_, to_='self')
def run(self) -> None: # -------------------------------------------- # 接口参数清洗 # -------------------------------------------- if not self._rss_pool: self._debug_printer("可解析参数为空") return None # -------------------------------------------- # 执行核心业务 # -------------------------------------------- # 解析订阅源 rss_pool = self._analyze_rss() # 获取Clash配置写法模板 default_config = self._load_startup_config(url=self.DEFAULT_CONFIG_URL, path=self.LOCAL_CONFIG_PATH) response_config = self._generate_model(rss_pool, default_config) # 生成并缓存模板文件 self._generate_yaml(cache_path=self.CLASH_CONFIG_YAML, config_content=response_config) # -------------------------------------------- # 调试日志打印 # -------------------------------------------- # self._debug_printer(f"配置文件已导出 -- {self.CLASH_CONFIG_YAML}") logger.success(f"配置文件已导出 -- {self.CLASH_CONFIG_YAML}") return
def interface(self, power: int = 8) -> None: """ @param power: 协程功率 @return: """ # 任务重载 self.offload_task() # 任务启动 task_list = [] power_ = self.power if self.power else power if self.max_queue_size != 0: power_ = self.max_queue_size if power_ > self.max_queue_size else power_ for x in range(power_): task = gevent.spawn(self.launch) task_list.append(task) gevent.joinall(task_list) self.killer() if self.debug_logger: logger.success( f'<Gevent> mission completed -- <{self.__class__.__name__}>')
def push_info(self, user: dict or List[dict]): if isinstance(user, dict): user = [user, ] elif not isinstance(user, list): logger.warning('MySQL add_user 调用格式有误') try: for user_ in user: try: sql = f'INSERT INTO v2raycs (' \ f'domain, subs, class_,end_life,res_time,passable,username,password,email,uuid) VALUES (' \ f'%s, %s, %s,%s, %s, %s,%s, %s, %s,%s)' val = (user_["domain"], user_["subs"], user_['class_'], user_['end_life'], user_["res_time"], user_['passable'], user_['username'], user_["password"], user_['email'], user_['uuid']) self.cursor.execute(sql, val) except KeyError as e: logger.warning(f"MySQL数据解析出错,user:dict必须同时包含username、password以及email的键值对{e}") # return 702 except pymysql.err.IntegrityError as e: logger.warning(f'{user_["username"]} -- 用户已在库,若需修改用户信息,请使用更新指令{e}') # return 701 else: logger.success(f'{user_["username"]} -- 用户添加成功') # return 700 finally: self.conn.commit() self.conn.close()
def startup() -> None: process_list = [] try: # 部署<单进程多线程>定时任务 if ENABLE_DEPLOY['global']: process_list.append( multiprocessing.Process(target=_SystemEngine.run_deploy, name='deploymentTimingTask')) # 部署flask if ENABLE_SERVER: process_list.append( multiprocessing.Process(target=_SystemEngine.run_server, name='deploymentFlaskAPI')) # 执行多进程任务 for process_ in process_list: logger.success(f'<SystemProcess> Startup -- {process_.name}') process_.start() # 添加阻塞 for process_ in process_list: process_.join() except TypeError or AttributeError as e: logger.exception(e) send_email(f"[程序异常终止]{str(e)}", to_='self') except KeyboardInterrupt: # FIXME 确保进程间不产生通信的情况下终止 logger.debug('<SystemProcess> Received keyboard interrupt signal') for process_ in process_list: process_.terminate() finally: logger.success('<SystemProcess> End the V2RayCloudSpider')
def quick_deploy_(docker=GeventSchedule, interface: str = 'interface', crontab_seconds: int = 100): """ @param crontab_seconds: 每间隔多少秒执行一次任务 @param interface: 接口函数名 @param docker: Python 类对象指针,如 SubscribesCleaner,而不是SubscribesCleaner() @return: """ logger.success(f'<GeventSchedule>启动成功 -- {docker.__name__}') def release_docker(): """ 由接口解压容器主线功能 @return: """ logger.info(f'<GeventSchedule> Release docker || Do {docker.__name__}') exec(f'docker().{interface}()') schedule.every(crontab_seconds).seconds.do(release_docker) while True: schedule.run_pending() time.sleep(1)
def startup(self): # -------------------------------------------------- # 开辟进程 # -------------------------------------------------- try: # 部署flask process_server = multiprocessing.Process(target=self.run_server, name="VirtualStation") # 执行业务 process_business = multiprocessing.Process( target=self.run_business, name="Adapter") # 执行多进程任务 process_server.start() process_business.start() # 简单监测 主动回收进程 while True: if not process_business.is_alive(): process_server.terminate() return True except TypeError or AttributeError as e: logger.exception(e) finally: logger.success('<ScaffoldGuider> End the V2RayCloudSpider') print(">>> 程序執行結束 請手動關閉交互窗口")
def startup() -> None: process_list = [] try: # 部署<单进程多线程>定时任务 if ENABLE_DEPLOY['global']: process_list.append( multiprocessing.Process(target=_SystemEngine.run_deploy, name='deploymentTimingTask')) # 部署flask if ENABLE_SERVER: process_list.append( multiprocessing.Process(target=_SystemEngine.run_server, name='deploymentFlaskAPI')) # 执行多进程任务 for process_ in process_list: logger.success(f'<SystemProcess> Startup -- {process_.name}') process_.start() # 添加阻塞 for process_ in process_list: process_.join() except (TypeError, AttributeError): pass except (KeyboardInterrupt, SystemExit): # FIXME 确保进程间不产生通信的情况下终止 logger.debug('<SystemProcess> Received keyboard interrupt signal.') for process_ in process_list: process_.terminate() finally: logger.success('<SystemProcess> The system exits completely.')
def pop_subs_to_admin(class_: str): """ @param class_: @return: """ logger.debug("<SuperAdmin> -- 获取订阅") from src.BusinessLogicLayer.cluster.sailor import manage_task try: # 获取该类型订阅剩余链接 remain_subs: list = RedisClient().sync_remain_subs( REDIS_SECRET_KEY.format(class_)) while True: # 若无可用链接则返回错误信息 if remain_subs.__len__() == 0: logger.error(f'<SuperAdmin> -- 无可用<{class_}>订阅') return {'msg': 'failed', 'info': f"无可用<{class_}>订阅"} else: # 从池中获取(最新加入的)订阅s-e subs, end_life = remain_subs.pop() # 将s-e加入缓冲队列,该队列将被ddt的refresh工作流同过期链接一同删除 # 使用缓冲队列的方案保证节拍同步,防止过热操作/失误操作贯穿Redis # 既当管理员通过此接口获取链接时,被返回的链接不会直接从池中删去 # 而是触发缓冲机制,既将该链接标记后加入apollo缓冲队列 # apollo队列内的元素都是欲删除缓存,当ddt发动后会一次性情况当前所有的缓存 # 对订阅进行质量粗检 # if subs2node(subs=subs, cache_path=False, timeout=2)['node'].__len__() <= 3: # logger.debug(f"<check> BadLink -- {subs}") # continue # 使用节拍同步线程锁发起连接池回滚指令,仅生成/同步一枚原子任务 threading.Thread(target=manage_task, kwargs={ "class_": class_, "only_sync": True }).start() logger.success('管理员模式--链接分发成功') # 立即执行链接解耦,将同一账号的所有订阅移除 # beat_sync =True立即刷新,False延迟刷新(节拍同步) threading.Thread(target=detach, kwargs={ "subscribe": subs, 'beat_sync': True }).start() return { 'msg': 'success', 'subscribe': subs, 'subsType': class_ } except Exception as e: logger.exception(e) return {'msg': 'failed', 'info': str(e)}
def load_any_subscribe(self, api: Chrome, element_xpath_str: str, href_xpath_str: str, class_: str, retry=0): """ 捕获订阅并送入持久化数据池 @param api: ChromeDriverObject @param element_xpath_str: 用于定位链接所在的标签 @param href_xpath_str: 用于取出目标标签的属性值,既subscribe @param class_: 该subscribe类型,如`ssr`/`v2ray`/`trojan` @param retry: 失败重试 @todo 使用 retrying 模块替代retry参数实现的功能(引入断网重连,断言重试,行为回滚...) @return: """ self.subscribe = WebDriverWait(api, 30).until(expected_conditions.presence_of_element_located(( By.XPATH, element_xpath_str ))).get_attribute(href_xpath_str) # 若对象可捕捉则解析数据并持久化数据 if self.subscribe: # 失败重试3次 for x in range(3): # ['domain', 'subs', 'class_', 'end_life', 'res_time', 'passable','username', 'password', 'email'] try: # 机场域名 domain = urlparse(self.register_url).netloc # 采集时间 res_time = str(datetime.now(TIME_ZONE_CN)).split('.')[0] # 链接可用,默认为true passable = 'true' # 信息键 docker = [domain, self.subscribe, class_, self.end_life, res_time, passable, self.username, self.password, self.email] # 根据不同的beat_sync形式持久化数据 FlexibleDistribute(docker=docker, beat_sync=self.beat_sync) # 数据存储成功后结束循环 logger.success(">> GET <{}> -> {}:{}".format(self.action_name, class_, self.subscribe)) # TODO ADD v5.1.0更新特性,记录机场域名-订阅域名映射缓存 # set_task2url_cache(task_name=self.__class__.__name__, register_url=self.register_url, # subs=self.subscribe) break except Exception as e: logger.debug(">> FAILED <{}> -> {}:{}".format(self.action_name, class_, e)) time.sleep(1) continue # 若没有成功存储,返回None else: return None # 否则调入健壮工程 # TODO 判断异常原因,构建解决方案,若无可靠解决方案,确保该任务安全退出 else: if retry >= 3: raise TimeoutException retry += 1 self.load_any_subscribe(api, element_xpath_str, href_xpath_str, class_, retry)
def __init__(self) -> None: terminal_echo( f"[SystemEngineIO] CONFIG_COLLECTOR_PERMISSION:{CRAWLER_SEQUENCE}", 1) terminal_echo(f"[SystemEngineIO] CONFIG_ENABLE_DEPLOY:{ENABLE_DEPLOY}", 1) terminal_echo("[SystemEngineIO] CONFIG_COROUTINE:True", 1) for action_image in ACTIONS_IO: terminal_echo(f"[SystemEngineIO] CONFIG_ACTIONS:{action_image}", 1) logger.success( "<SystemEngineIO> System core initialized successfully.")
def run(self): try: if [cq for cq in reversed(self.root) if not os.path.exists(cq)]: logger.warning('系统文件残缺!') logger.debug("启动<工程重构>模块...") self.set_up_file_tree(self.root) self.check_config() finally: if self.flag: logger.success(">>> 运行环境链接完成,请重启项目") logger.warning(">>> 提醒您正确配置Chrome及对应版本的ChromeDriver") sys.exit()
def send_email(msg, to_: List[str] or str or set, headers: str = None): """ 发送运维信息,该函数仅用于发送简单文本信息 :param msg: 正文内容 :param to_: 发送对象 1. str to_ == 'self',发送给“自己” 2. List[str] 传入邮箱列表,群发邮件(内容相同)。 :param headers: :@todo 加入日志读取功能(open file)以及富文本信息功能(html邮件) :return: 默认为'<V2Ray云彩姬>运维日志' """ headers = headers if headers else '<V2Ray云彩姬>运维日志' sender = SMTP_ACCOUNT.get('email') password = SMTP_ACCOUNT.get('sid') smtp_server = 'smtp.qq.com' message = MIMEText(msg, 'plain', 'utf-8') message['From'] = Header('ARAI.DM', 'utf-8') # 发送者 message['Subject'] = Header(f"{headers}", 'utf-8') server = smtplib.SMTP_SSL(smtp_server, 465) # 输入转换 if to_ == 'self': to_ = set(sender, ) if isinstance(to_, str): to_ = [ to_, ] if isinstance(to_, list): to_ = set(to_) if not isinstance(to_, set): return False try: server.login(sender, password) for to in to_: try: message['To'] = Header(to, 'utf-8') # 接收者 server.sendmail(sender, to, message.as_string()) logger.success("发送成功->{}".format(to)) except smtplib.SMTPRecipientsRefused: logger.warning('邮箱填写错误或不存在->{}'.format(to)) except Exception as e: logger.error('>>> 发送失败 || {}'.format(e)) finally: server.quit()
def run(beat_sync=True, force_run=None) -> None: """ 本地运行--检查队列残缺 # 所有类型任务的节点行为的同时发起 or 所有类型任务的节点行为按序执行,node任务之间互不影响 --v2rayChain --vNode_1 --vNode_2 --.... --ssrChain --sNode_1 --sNode_2 --... --.. -----> runtime v2rayChain IF USE vsu -> runtime allTask =====> runtime ... -----> runtime ssrChain ELSE -> runtime allTask -> Chain_1 -> Chain_2 -> ... -----> runtime node_1 IF USE go -> runtime allNode =====> runtime ... -----> runtime node_N ELSE -> runtime allNode-> the_node_1 -> the_node_2 -> ... @return: """ # 同步任务队列(广度优先) # 这是一次越权执行,无论本机是否具备collector权限都将执行一轮协程空间的创建任务 for class_ in CRAWLER_SEQUENCE: sailor.manage_task(class_=class_, beat_sync=beat_sync, force_run=force_run) # FIXME 节拍同步 if not beat_sync: from src.BusinessCentralLayer.middleware.subscribe_io import FlexibleDistribute FlexibleDistribute().start() # 执行一次数据迁移 # TODO 将集群接入多哨兵模式,减轻原生数据拷贝的额外CPU资源开销 _cd.startup_ddt_overdue() # 任务结束 logger.success('<Gevent>任务结束')
def set_up_file_tree(self, root): """ --/qinse/V2RaycSpider{verNum} --BCL --BLL --BVL --Database --client_depot --vcs.csv --logs --*error.log --*runtime.log --temp_cache --*AnyTempCacheFile... --*CrawlFetchHistory.txt --fake_useragent_0.1.11.json --*tests """ # 检查默认下载地址是否残缺 深度优先初始化系统文件 for child_ in root: if not os.path.exists(child_): self.flag = True try: # 初始化文件夹 if os.path.isdir( child_) or not os.path.splitext(child_)[-1]: os.mkdir(child_) logger.success(f"系统文件链接成功->{child_}") # 初始化文件 else: if child_ == SERVER_PATH_DEPOT_VCS: try: with open(child_, 'w', encoding='utf-8', newline='') as fpx: csv.writer(fpx).writerow( ['version', 'title']) logger.success(f"系统文件链接成功->{child_}") except Exception as ep: logger.exception(f"Exception{child_}{ep}") except Exception as ep: logger.exception(ep)
def __init__(self) -> None: logger.info(f'<系统初始化> SystemEngine -> {platform}') # 读取配置序列 logger.info(f'<定位配置> check_sequence:{CRAWLER_SEQUENCE}') # 默认linux下自动部署 logger.info(f'<部署设置> enable_deploy:{ENABLE_DEPLOY}') # 协程加速配置 logger.info(f"<协程加速> Coroutine:{enable_coroutine}") # 解压接口容器 logger.info("<解压容器> DockerEngineInterface") # 初始化进程 logger.info(f'<加载队列> IndexQueue:{actions.__entropy__}') logger.success('<Gevent> 工程核心准备就绪 任务即将开始')
def refresh(self, key_name: str, cross_threshold: int = None) -> None: """ 原子级链接池刷新,一次性删去所有过期的key_name subscribe @param cross_threshold: 越过阈值删除订阅 @param key_name:secret_key @return: """ docker: dict = self.db.hgetall(key_name) # 管理员指令获取的链接 if self.get_len(key_name) != 0: for subscribe, end_life in docker.items(): if self.is_stale(end_life, cross_threshold): logger.debug(f'del-({key_name})--{subscribe}') self.db.hdel(key_name, subscribe) logger.success('<{}> UPDATE - {}({})'.format( self.__class__.__name__, key_name, self.get_len(key_name))) else: logger.warning('<{}> EMPTY - {}({})'.format( self.__class__.__name__, key_name, self.get_len(key_name)))
def _scaffold_exile(task_sequential=4): logger.debug( f"<ScaffoldGuider> Exile[0/{task_sequential}] || Running scaffold exile..." ) time.sleep(0.3) # task1: 检查队列任务 logger.debug( f"<ScaffoldGuider> Exile[1/{task_sequential}] || Checking the task queue..." ) time.sleep(0.3) _ScaffoldGuider._scaffold_entropy(_debug=True) # logger.success(f">>> [Mission Completed] || entropy") # task2: decouple logger.debug( f"<ScaffoldGuider> Exile[2/{task_sequential}] || Cleaning the subscribe pool..." ) time.sleep(0.3) _ScaffoldGuider._scaffold_decouple() # logger.success(f">>> [Mission Completed] || decouple") # task3: overdue logger.debug( f"<ScaffoldGuider> Exile[3/{task_sequential}] || Cleaning timed out subscribes..." ) time.sleep(0.3) _ScaffoldGuider._scaffold_overdue() # logger.success(">>> [Mission Completed] || overdue") # finally: print task-queue, remaining subscribes logger.debug( f"<ScaffoldGuider> Exile[{task_sequential}/{task_sequential}] || Outputting debug data..." ) _ScaffoldGuider._scaffold_entropy() _ScaffoldGuider._scaffold_remain() logger.success("<ScaffoldGuider> Exile[Mission Completed] || exile")
def killer(self): if not self.debug: logger.success("<SubscribesCleaner> --> decouple compete.")
def sync_actions( class_: str, mode_sync: str = None, only_sync=False, beat_sync=True, ): """ @param class_: @param mode_sync: 是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务 @param only_sync: @param beat_sync: @return: """ logger.info( f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...") # ================================================ # 节拍停顿 原子同步 # ================================================ rc = RedisClient() _state = _is_overflow(task_name=class_, rc=rc) if _state == 'stop': return _state # ================================================ # 更新任务信息 # ================================================ # 公示即将发动的采集任务数据 _update_entropy(rc=rc, entropy=__entropy__) # 通由工厂读取映射表批量生产采集器运行实体 sync_queue: list = ActionShunt(class_, silence=True, beat_sync=beat_sync).shunt() # 打乱任务序列 random.shuffle(sync_queue) # ================================================ # $执行核心业务 # ================================================ if mode_sync == 'upload': # fixme:临时方案:解决链接溢出问题 if round(rc.get_len(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP: logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持") return None # 持续实例化采集任务 for _ in range(sync_queue.__len__()): rc.sync_message_queue(mode='upload', message=class_) # 节拍同步线程锁 if only_sync: logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务") break logger.success("<TaskManager> UploadTasks -- 任务上传完毕") elif mode_sync == 'download': async_queue: list = [] while True: # 获取原子任务 atomic = rc.sync_message_queue(mode='download') # 若原子有效则同步数据 if atomic and atomic in CRAWLER_SEQUENCE: # 判断同步状态 # 防止过载。当本地缓冲任务即将突破容载极限时停止同步 # _state 状态有三,continue/offload/stop _state = _is_overflow(task_name=atomic, rc=rc) if _state != 'continue': return _state if async_queue.__len__() == 0: async_queue = ActionShunt(atomic, silence=True, beat_sync=beat_sync).shunt() random.shuffle(async_queue) # 将采集器实体推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(async_queue.pop()) logger.info( f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})' ) # 节拍同步线程锁 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务" ) return 'offload' else: return 'offload' elif mode_sync == 'force_run': for slave_ in sync_queue: # ================================================================================================ # TODO v5.4.r 版本新增特性 scaffold spawn # 1. 之前版本中通由scaffold 无论运行 run 还是 force-run 指令都无法在队列满载的情况下启动采集任务 # 主要原因在于如下几行代码加了锁 # 2. 通过新增的spawn指令可绕过此模块通由SpawnBooster直接编译底层代码启动采集器 # ================================================================================================ # force_run :适用于单机部署或单步调试下 # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数 _state = _is_overflow(task_name=class_, rc=rc) if _state != 'continue': return _state # 将采集器实体推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(slave_) # 节拍同步线程锁 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'stop' return 'offload'
def manage_task(class_: str = 'v2ray', only_sync=False, run_collector=None, beat_sync=True, force_run=None) -> bool: """ 加载任务 @param force_run: debug模式下的强制运行,可逃逸队列满载检测 @param run_collector:创建协程工作空间,并开始并发执行队列任务。 @param only_sync:节拍同步线程锁。当本机任务数大于0时,将1枚原子任务推送至Poseidon协程空间。 @param class_: 任务类型,必须在 crawler seq内,如 ssr,v2ray or trojan。 @param beat_sync: @return: """ # ---------------------------------------------------- # 参数审查与转译 # ---------------------------------------------------- # 若申请执行的任务类型不在本机授权范围内则结束本次任务 if class_ not in CRAWLER_SEQUENCE: return False # collector_permission 审核采集权限,允许越权传参。当手动指定参数时,可授予本机采集权限,否则使用配置权限 collector_permission: bool = ENABLE_DEPLOY.get('tasks').get( 'collector') if run_collector is None else run_collector # force_run 强制运行,若不指定该参数,则以“是否单机部署”决定“是否运行force_run” # 既默认单机模式下开启force_run # 若未传参时也未定义部署形式(null),则默认不使用force_run force_run = force_run if force_run else SINGLE_DEPLOYMENT # ---------------------------------------------------- # 解析同步模式 # ---------------------------------------------------- # 以本机是否有采集权限来区分download 以及upload两种同步模式 mode_sync = "download" if collector_permission is True else "upload" # 以更高优先级的`force_run` 替代传统同步模式,执行强制采集方案 mode_sync = "force_run" if force_run is True else mode_sync # ---------------------------------------------------- # 同步消息(任务)队列 # ---------------------------------------------------- # IF 本机具备采集权限,将任务同步至本机执行,在单机部署情况下任务自产自销。 # ELSE 负责将生成的任务加入消息队列 response: str or bool = sync_actions( class_=class_, only_sync=only_sync, beat_sync=beat_sync, mode_sync=mode_sync, ) # ---------------------------------------------------- # 初始化协程空间(执行任务) # ---------------------------------------------------- # 若本机开启了采集器权限则创建协程空间 # 若从control-deploy进入此函数,则说明本机必定具备创建协程空间权限 if force_run: if (response == 'offload') and (Middleware.poseidon.qsize() > 0): logger.info(f'<TaskManager> ForceRun || <{class_}>采集任务启动') ShuntRelease(work_queue=Middleware.poseidon).interface() logger.success(f'<TaskManager> ForceWorkFinish || <{class_}>采集任务结束') return True # if 'force_run' is False and the node has the permissions of collector if collector_permission: # if task queue can be work if (response == 'offload') and (Middleware.poseidon.qsize() > 0): logger.info('<TaskManager> Run || 采集任务启动') ShuntRelease(work_queue=Middleware.poseidon).interface() logger.success('<TaskManager> Finish || 采集任务结束') return True # logger.warning(f"<TaskManager> Hijack<{class_}> || 当前节点不具备采集权限") return False
def _sync_actions( class_: str, mode_sync: str = None, only_sync=False, beat_sync=True, ): """ @param class_: @param mode_sync: 是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务 @param only_sync: @param beat_sync: @return: """ logger.info(f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...") # TODO 原子化同步行为 rc = RedisClient() # 拷贝生成队列,需使用copy()完成拷贝,否则pop()会影响actions-list本体 # [A-Cloud,B-Cloud, ...] task_list: list = actions.__all__.copy() random.shuffle(task_list) # 在本机环境中生成任务并加入消息队列 if mode_sync == 'upload': # 临时方案,解决链接溢出问题 if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP: logger.warning("<TaskManager> UploadHijack -- 连接池任务已溢出,上传任务被劫持") return None # 持续实例化采集任务 while True: if task_list.__len__() == 0: logger.success("<TaskManager> EmptyList -- 本机任务为空或已完全生成") break else: slave_ = task_list.pop() # 将相应的任务执行语句转换成exec语法 expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \ f'{slave_}(beat_sync={beat_sync}).run()' # 将执行语句同步至消息队列 rc.sync_message_queue(mode='upload', message=expr) # 节拍同步线程锁 if only_sync: logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务") break logger.info(f"<TaskManager> 本节点任务({actions.__all__.__len__()})已同步至消息队列," f"待集群接收订阅后既可完成后续任务") # 同步分布式消息队列的任务 elif mode_sync == 'download': while True: # 判断同步状态 # 防止过载。当本地缓冲任务即将突破容载极限时停止同步 # _state 状态有三,continue/offload/stop _state = _is_overflow(task_name=class_, rc=rc) if _state != 'continue': return _state # 获取原子任务,该任务应已封装为exec语法 # todo 将入队操作封装到redis里,以获得合理的循环退出条件 atomic = rc.sync_message_queue(mode='download') # 若原子有效则同步数据 if atomic: # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(atomic) logger.info(f'<TaskManager> offload atomic<{class_}>') # 节拍同步线程锁 if only_sync: logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'offload' # 否则打印警告日志并提前退出同步 else: logger.warning(f"<TaskManager> SyncFinish -- <{class_}>无可同步任务") break elif mode_sync == 'force_run': for slave_ in task_list: # force_run :适用于单机部署或单步调试下 _state = _is_overflow(task_name=class_, rc=rc) # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数 if _state == 'stop': return 'stop' # 将相应的任务执行语句转换成exec语法 expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \ f'{slave_}(beat_sync={beat_sync}).run()' # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(expr) # 在force_run模式下仍制约于节拍同步线程锁 # 此举服务于主机的订阅补充操作 # 优先级更高,不受队列可用容载影响强制中断同步操作 if only_sync: logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'stop' else: logger.success(f"<TaskManager> ForceCollect" f" -- 已将本地预设任务({actions.__all__.__len__()})录入待执行队列") return 'offload'
def manage_task( class_: str = 'v2ray', speedup: bool = True, only_sync=False, startup=None, beat_sync=True, force_run=None ) -> bool: """ 加载任务 @param force_run: debug模式下的强制运行,可逃逸队列满载检测 @param startup:创建协程工作空间,并开始并发执行队列任务。 @param only_sync:节拍同步线程锁。当本机任务数大于0时,将1枚原子任务推送至Poseidon协程空间。 @param class_: 任务类型,必须在 crawler seq内,如 ssr,v2ray or trojan。 @param speedup: 使用加速插件。默认使用coroutine-speedup。 @param beat_sync: @return: """ # ---------------------------------------------------- # 参数审查与转译 # ---------------------------------------------------- # 检查输入 if class_ not in CRAWLER_SEQUENCE or not isinstance(class_, str): return False # 审核采集权限,允许越权传参。当手动指定参数时,可授予本机采集权限,否则使用配置权限 local_work: bool = startup if startup else ENABLE_DEPLOY.get('tasks').get('collector') # 强制运行:指定参数优先级更高,若不指定则以是否单机部署模式决定运行force_run是否开启 # 默认单机模式下开启force_run # 若未传参时也未定义部署形式(null),则默认不使用force_run force_run = force_run if force_run else SINGLE_DEPLOYMENT # ---------------------------------------------------- # 解析同步模式 # ---------------------------------------------------- # 以本机是否有采集权限来区分download 以及upload两种同步模式 mode_sync = "download" if local_work else "upload" # 以更高优先级的`force_run` 替代传统同步模式,执行强制采集方案 mode_sync = "force_run" if force_run else mode_sync # ---------------------------------------------------- # 同步消息(任务)队列 # ---------------------------------------------------- # 当本机可采集时,将任务同步至本机执行,若消息队列为空则 # 若本机不可采集,则生成任务加入消息队列 response: str or bool = _sync_actions( class_=class_, only_sync=only_sync, beat_sync=beat_sync, mode_sync=mode_sync, ) # ---------------------------------------------------- # 初始化协程空间(执行任务) # ---------------------------------------------------- # 若本机开启了采集器权限则创建协程空间 # 若从control-deploy进入此函数,则说明本机必定具备创建协程空间权限 if force_run: if response == 'offload': logger.info(f'<TaskManager> ForceRun || <{class_}>采集任务启动') vsu(core=PuppetCore(), docker=Middleware.poseidon).run(speedup) logger.success(f'<TaskManager> ForceWorkFinish || <{class_}>采集任务结束') return True # if 'force_run' is False and the node has the permissions of collector if local_work: # if task queue can be work if response == 'offload': logger.info(f'<TaskManager> Run || <{class_}>采集任务启动') vsu(core=PuppetCore(), docker=Middleware.poseidon).run(speedup) logger.success(f'<TaskManager> Finish || <{class_}>采集任务结束') return True else: logger.warning(f"<TaskManager> Hijack<{class_}> || 当前节点不具备采集权限") return False
def _sync_actions( class_: str, mode_sync: str = None, only_sync=False, beat_sync=True, ): """ @param class_: @param mode_sync: 是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务 @param only_sync: @param beat_sync: @return: """ logger.info( f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...") # TODO 原子化同步行为 rc = RedisClient() # 节拍停顿 _state = _is_overflow(task_name=class_, rc=rc) if _state == 'stop': return _state sync_queue: list = ActionShunt(class_, silence=True, beat_sync=beat_sync).shunt() random.shuffle(sync_queue) # 在本机环境中生成任务并加入消息队列 if mode_sync == 'upload': # fixme:临时方案:解决链接溢出问题 if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP: logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持") return None # 持续实例化采集任务 for _ in range(sync_queue.__len__()): rc.sync_message_queue(mode='upload', message=class_) # 节拍同步线程锁 if only_sync: logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务") break logger.success("<TaskManager> UploadTasks -- 任务上传完毕") # 同步分布式消息队列的任务 elif mode_sync == 'download': async_queue: list = [] while True: # 获取原子任务 atomic = rc.sync_message_queue(mode='download') # 若原子有效则同步数据 if atomic and atomic in CRAWLER_SEQUENCE: # 判断同步状态 # 防止过载。当本地缓冲任务即将突破容载极限时停止同步 # _state 状态有三,continue/offload/stop _state = _is_overflow(task_name=atomic, rc=rc) if _state != 'continue': return _state if async_queue.__len__() == 0: async_queue = ActionShunt(atomic, silence=True, beat_sync=beat_sync).shunt() random.shuffle(async_queue) # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(async_queue.pop()) logger.info( f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})' ) # 节拍同步线程锁 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务" ) return 'offload' # 否则打印警告日志并提前退出同步 else: # logger.warning(f"<TaskManager> SyncFinish -- <{atomic}>无可同步任务") return 'offload' elif mode_sync == 'force_run': for slave_ in sync_queue: # force_run :适用于单机部署或单步调试下 # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数 _state = _is_overflow(task_name=class_, rc=rc) if _state != 'continue': return _state # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(slave_) # 在force_run模式下仍制约于节拍同步线程锁 # 此举服务于主机的订阅补充操作 # 优先级更高,不受队列可用容载影响强制中断同步操作 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'stop' return 'offload'