示例#1
0
    def _deploy_jobs(self):
        """

        @return:
        """

        try:
            for docker in self.dockers:
                # 添加任务
                self.scheduler_.add_job(
                    func=docker['api'],
                    trigger=IntervalTrigger(
                        seconds=self.interval_[docker['name']]),
                    id=docker['name'],
                    jitter=5)
                # 打印日志
                logger.info(
                    f'<BlockingScheduler> Add job -- <{docker["name"]}>'
                    f' IntervalTrigger: {self.interval_[docker["name"]]}s')
            # 启动任务
            self.scheduler_.start()
        except KeyboardInterrupt as err:
            logger.stop('Forced stop ||{}'.format(err))
        except Exception as err:
            logger.exception(f'<BlockingScheduler>||{err}')
示例#2
0
 def deploy_jobs(self):
     try:
         for docker in self.dockers:
             # 添加任务
             job = self.scheduler_.add_job(
                 func=docker['api'],
                 trigger=IntervalTrigger(
                     seconds=self.interval_[docker['name']]),
                 id=docker['name'],
                 # 定時抖動
                 jitter=5,
                 # 定任务设置最大实例并行数
                 max_instances=16,
                 # 把多个排队执行的同一个哑弹任务变成一个
                 coalesce=True,
             )
             self.jobs.append(job)
             # 打印日志
             logger.info(
                 f'<BlockingScheduler> Add job -- <{docker["name"]}>'
                 f' IntervalTrigger: {self.interval_[docker["name"]]}s')
         # 启动任务
         self.scheduler_.start()
     except KeyboardInterrupt:
         self.scheduler_.shutdown(wait=False)
         logger.warning(
             "<BlockingScheduler> The admin forcibly terminated the scheduled task"
         )
     except Exception as err:
         logger.exception(f'<BlockingScheduler>||{err}')
    def run(self, speed_up=True, use_bar=False) -> None:
        """
        协程任务接口
        :return:
        """
        task_list = []

        if isinstance(self.docker, list):
            # 刷新任务队列
            self.load_tasks(tasks=self.docker)
        else:
            # 业务交接
            self.work_Q = self.docker

        # 弹性协程
        if not speed_up:
            self.power = 1
        else:
            self.flexible_power()
        logger.info(
            f'<Gevent> Flexible Power:{self.power} || Queue Capacity:{self.max_queue_size}'
        )

        # 启动进度条
        if use_bar:
            import threading
            threading.Thread(target=self.progress_manager,
                             args=(self.max_queue_size, self.progress_name +
                                   '[{}]'.format(self.power))).start()

        for x in range(self.power):
            task = gevent.spawn(self.launch)
            task_list.append(task)
        gevent.joinall(task_list)
示例#4
0
 def release_docker():
     """
     由接口解压容器主线功能
     @return:
     """
     logger.info(f'<GeventSchedule> Release docker || Do {docker.__name__}')
     exec(f'docker().{interface}()')
示例#5
0
    def _scaffold_parse(url, _unused_mode: str = "subscribe"):
        logger.info(f">>> PARSE --> {url}")
        from src.BusinessLogicLayer.plugins.accelerator import cleaner

        # 检查路径完整性
        if not os.path.exists(SERVER_DIR_DATABASE_CACHE):
            os.mkdir(SERVER_DIR_DATABASE_CACHE)

        # 调取API解析链接
        result = cleaner.subs2node(url)
        if result and isinstance(result, dict):
            _, info, nodes = result.values()

            # 节点数量 减去无效的注释项
            _unused_node_num = nodes.__len__(
            ) - 2 if nodes.__len__() - 2 >= 0 else 0
            token_ = '' if info.get('token') is None else info.get('token')

            # 缓存数据
            cache_sub2node = os.path.join(SERVER_DIR_DATABASE_CACHE,
                                          f'sub2node_{token_}.txt')
            with open(cache_sub2node, 'w', encoding="utf8") as f:
                for node in nodes:
                    f.write(f"{node}\n")

            # 自动打开缓存文件,仅在parse一个链接时启用
            # os.startfile(cache_sub2node)

            cleaner.node2detail(nodes[0])

        else:
            return False
示例#6
0
    def __init__(self):
        super(RedisDataDisasterTolerance, self).__init__()

        from src.BusinessCentralLayer.setting import REDIS_SLAVER_DDT
        if not REDIS_SLAVER_DDT.get('host'):
            logger.warning('未设置数据容灾服务器,该职能将由Master执行')
            # 拷贝参数
            redis_virtual = REDIS_MASTER
            # 改动浅拷贝数据库
            redis_virtual.update({'db': redis_virtual['db'] + 1})
            logger.debug("备份重定向 --> {}".format(redis_virtual))
        else:
            redis_virtual = REDIS_SLAVER_DDT
        # 容器初始化
        self.docker = {}
        try:
            self.acm = RedisClient(host=redis_virtual['host'],
                                   port=redis_virtual['port'],
                                   password=redis_virtual['password'])
            logger.info("DDT: Master({}) -> Slaver({})".format(
                REDIS_MASTER['host'], redis_virtual['host']))
        except redis.exceptions.ConnectionError as e:
            logger.exception(e)
        finally:
            self.redis_virtual = redis_virtual
示例#7
0
 def _scaffold_spawn():
     _ConfigQuarantine.check_config(call_driver=True)
     logger.info("<ScaffoldGuider> Spawn || MainCollector")
     from src.BusinessLogicLayer.cluster.slavers import __entropy__
     from src.BusinessLogicLayer.plugins.accelerator import booster
     booster(docker=__entropy__,
             silence=True,
             power=DEFAULT_POWER,
             assault=True)
示例#8
0
    def run_deploy() -> None:
        """
        定时任务,建议使用if而非for构造任务线程池
        @return:
        """
        # 载入定时任务权限配置
        tasks = ENABLE_DEPLOY['tasks']
        task2function = {
            'ddt_decouple': _cd.startup_ddt_decouple,
            'ddt_overdue': _cd.startup_ddt_overdue,
        }
        try:

            # 初始化调度器
            docker_of_based_scheduler = TasksScheduler()
            docker_of_collector_scheduler = CollectorScheduler()
            # 清洗配置 使调度间隔更加合理
            interval = _cd.sync_launch_interval()
            # 添加任务
            for docker_name, permission in tasks.items():
                logger.info(
                    f"[Job] {docker_name} -- interval: {interval[docker_name]}s -- run: {permission}"
                )
                # 若开启采集器则使用CollectorScheduler映射任务
                # 使用久策略将此分流判断注释既可
                if docker_name == "collector":
                    docker_of_collector_scheduler.mapping_config({
                        'interval':
                        interval[docker_name],
                        'permission':
                        permission,
                    })
                    continue
                if permission:
                    docker_of_based_scheduler.add_job({
                        "name":
                        docker_name,
                        "api":
                        task2function[docker_name],
                        'interval':
                        interval[docker_name],
                        'permission':
                        True
                    })
            # 启动定时任务 要求执行采集任务时必须至少携带另一种其他部署任务
            docker_of_collector_scheduler.deploy_jobs()
            docker_of_based_scheduler.deploy_jobs()
        except ConnectionError:
            logger.warning(
                "<RedisIO> Network communication failure, please check the network connection."
            )
        except KeyError:
            logger.critical(f'config中枢层配置被篡改,ENABLE_DEPLOY 配置中无对应键值对{tasks}')
            sys.exit()
        except NameError:
            logger.critical('eval()或exec()语法异常,检测变量名是否不一致。')
示例#9
0
 def run(self, api=None):
     logger.info("DO -- <{}>:beat_sync:{}".format(self.action_name, self.beat_sync))
     api = self.set_spider_option() if api is None else api
     try:
         self.get_html_handle(api, self.register_url, 60)
         self.verify(api)
         print(self.identity)
         self.divide_and_rule()
         self.sign_up(api)
     finally:
         api.quit()
示例#10
0
    def run(self):
        logger.info("DO -- <{}>:beat_sync:{}".format(self.action_name, self.beat_sync))

        # ======================================
        # 获取任务设置
        # ======================================
        api = self.set_spider_option()
        # ======================================
        # 执行核心业务逻辑
        # ======================================
        try:
            # 跳转网页
            # 设置超时(堡垒机/Cloudflare/WebError/流量劫持/IP防火墙)引发TimeoutException
            self.get_html_handle(api=api, url=self.register_url, wait_seconds=15)

            # 注册账号
            self.sign_up(api)

            # 等待核心元素加载/渲染
            self.wait(api, 20, "//div[@class='card-body']")

            # 捕获各类型订阅
            if self.hyper_params['v2ray']:
                self.load_any_subscribe(
                    api,
                    "//div[@class='buttons']//a[contains(@class,'v2ray')]",
                    'data-clipboard-text',
                    'v2ray'
                )
            elif self.hyper_params['ssr']:
                self.load_any_subscribe(
                    api,
                    """//a[@onclick="importSublink('ssr')"]/..//a[contains(@class,'copy')]""",
                    'data-clipboard-text',
                    'ssr'
                )
            # elif self.hyper_params['trojan']: ...
            # elif self.hyper_params['kit']: ...
            # elif self.hyper_params['qtl']: ...
        except TimeoutException:
            logger.error(f'>>> TimeoutException <{self.action_name}> -- {self.register_url}')
        except WebDriverException as e:
            logger.error(f">>> WebDriverException <{self.action_name}> -- {e}")
        except Exception as e:
            logger.exception(f">>> Exception <{self.action_name}> -- {e}")
        finally:
            api.quit()
示例#11
0
    def _scaffold_ash():
        """
        无尽套娃
        """
        from src.BusinessLogicLayer.apis import scaffold_api
        logger.info("<ScaffoldGuider> ash | Clash订阅堆一键生成脚本")

        # --------------------------------------------------
        # 参数清洗
        # --------------------------------------------------
        if 'win' not in sys.platform:
            return

        # --------------------------------------------------
        # 运行脚本
        # --------------------------------------------------
        return scaffold_api.ash(debug=True, decouple=True)
示例#12
0
    def collector(self,
                  silence: bool = True,
                  debug: bool = False,
                  page_num: int = 26,
                  sleep_node: int = 5):
        """
        STAFF site collector

        Use Selenium to obtain small batch samples through Google Search Engine
        (according to statistics, there are about 245 legal sites worldwide)

        The collection principle is roughly as follows:
        Use the characteristic word SEO to detect whether the target site exists `/staff` page content.

        :param silence: True无头启动(默认),False显示启动(请仅在调试时启动)
        :param debug:
        :param page_num: 采集“页数”,一页约10条检索结果,依上文所述,此处page_num设置为26左右
        :param sleep_node: 每采集多少页进行一次随机时长的休眠,默认sleep_node为5
        :return:
        """
        logger.info(
            f"Successfully obtained interface permissions -> {StaffCollector.__name__}"
        )

        try:
            # 采集器实例化
            StaffCollector(
                silence=silence,
                # cache_path 为采集到的站点链接输出目录
                cache_path=self._cache_path_staff_hosts,
                chromedriver_path=CHROMEDRIVER_PATH,
                debug=debug).run(page_num=page_num, sleep_node=sleep_node)
        except CollectorSwitchError:
            logger.error(
                "<StaffCollector> Traffic interception is detected, and the system is taking a backup plan"
            )
        except IndexError:
            logger.warning(
                "<StaffCollector> An error occurred while switching the page number"
            )
        except NoSuchWindowException:
            logger.error("<StaffCollector> The Chromedriver exited abnormally")
        except Exception as e:
            logger.exception(f"<StaffCollector> {e}")
示例#13
0
    def run(self):
        logger.info("DO -- <{}>:beat_sync:{}".format(self.__class__.__name__,
                                                     self.beat_sync))

        api = self.set_spider_option()

        api.get(self.register_url)

        try:
            self.sign_up(api)

            self.wait(api, 20, "//div[@class='card-body']")

            # get v2ray link
            if self.hyper_params['v2ray']:
                self.load_any_subscribe(
                    api,
                    "//div[@class='buttons']//a[contains(@class,'v2ray')]",
                    'data-clipboard-text', 'v2ray')

            # get ssr link
            elif self.hyper_params['ssr']:
                self.load_any_subscribe(
                    api,
                    """//a[@onclick="importSublink('ssr')"]/..//a[contains(@class,'copy')]""",
                    'data-clipboard-text', 'ssr')
            # if self.hyper_params['trojan']: ...
            # if self.hyper_params['kit']: ...
            # if self.hyper_params['qtl']: ...
        except TimeoutException:
            logger.error(
                f'>>> TimeoutException <{self.__class__.__name__}> -- {self.register_url}'
            )
        # except WebDriverException as e:
        #     logger.exception(f">>> Exception <{self.__class__.__name__}> -- {e}")
        except Exception as e:
            logger.exception(
                f">>> Exception <{self.__class__.__name__}> -- {e}")
        finally:
            # Middleware.hera.put_nowait("push")
            api.quit()
示例#14
0
    def run_business(self):
        # 1.清洗过期订阅
        if self.decouple:
            logger.info("<ClashTaskAsh> ash | 正在清洗订阅池...")
            SubscribesCleaner(debug=False).interface()
        # 2.拉取订阅池
        logger.info("<ClashTaskAsh> ash | 正在拉取订阅堆...")
        rc = RedisClient().get_driver()
        rss_pool = [subscribe for key_ in CRAWLER_SEQUENCE for subscribe, _ in
                    rc.hgetall(REDIS_SECRET_KEY.format(key_)).items()]
        # 2.1 筛选订阅防止重名
        rss_dict = {}
        for url in rss_pool:
            rss_dict.update({f"{urlparse(url).netloc}@{urlparse(url).query}": url})
        rss_pool = [i[-1] for i in rss_dict.items()]

        # 2.2 删除选中的订阅(取出)  debug模式下不删除
        if not self.debug:
            for subscribe in rss_pool:
                detach(subscribe=subscribe)
        # 3.订阅转换
        logger.info("<ClashTaskAsh> ash | 正在转换订阅模式...")
        # 4.执行订阅转换并缓存配置文件
        clash_adapter.api.run(subscribe=rss_pool)
        # 5.创建本地连接 启动Clash
        webbrowser.open(clash_adapter.api.url_scheme_download()['info'].format("http://127.0.0.1:8847/V2Ray云彩姬"))
        time.sleep(5)
        return True
示例#15
0
def _is_overflow(task_name: str, rc=None):
    """
    判断当前缓存是否已达单机采集极限
    @param task_name: class_
    @param rc: RedisClient Object Driver API
    @return:
        --stop: 停止任务同步并结束本轮采集任务
        --offload:停止任务同步并开始执行采集任务
        --continue:继续同步任务
    """

    # TODO 将缓存操作原子化
    cap: int = SINGLE_TASK_CAP

    # 获取当前仓库剩余
    storage_remain: int = rc.__len__(REDIS_SECRET_KEY.format(f'{task_name}'))

    # 获取本机任务缓存
    cache_size: int = Middleware.poseidon.qsize()

    # 判断任务队列是否达到满载状态或已溢出
    if storage_remain >= cap:
        logger.warning(
            f'<TaskManager> OverFlow || 任务溢出<{task_name}>({storage_remain}/{cap})'
        )
        return 'stop'

    # 判断缓冲队列是否已达单机采集极限
    # 未防止绝对溢出,此处限制单机任务数不可超过满载值的~x%
    # x = 1 if signal collector else x = 1/sum (Number of processes)
    elif storage_remain + cache_size > round(cap * 0.8):
        # 若已达或超过单机采集极限,则休眠任务
        logger.info(
            f'<TaskManager> BeatPause || 节拍停顿<{task_name}>({storage_remain + cache_size}/{cap})'
        )
        return 'offload'

    # 否则可以继续同步任务
    else:
        return 'continue'
示例#16
0
 def run(self):
     logger.info("DO -- <{}>:beat_sync:{}".format(self.action_name, self.beat_sync))
     # 获取任务设置
     api = self.set_spider_option()
     # 执行核心业务逻辑
     try:
         # 设置弹性计时器,当目标站点未能在规定时间内渲染到预期范围时自主销毁实体
         # 防止云彩姬误陷“战局”被站长过肩摔
         self.get_html_handle(api=api, url=self.register_url, wait_seconds=45)
         # 注册账号
         self.sign_up(api)
         # 进入站点并等待核心元素渲染完成
         self.wait(api, 40, "//div[@class='card-body']")
         # 根据原子类型订阅的优先顺序 依次捕获
         if self.hyper_params['v2ray']:
             self.load_any_subscribe(
                 api,
                 "//div[@class='buttons']//a[contains(@class,'v2ray')]",
                 'data-clipboard-text',
                 'v2ray'
             )
         elif self.hyper_params['ssr']:
             self.load_any_subscribe(
                 api,
                 """//a[@onclick="importSublink('ssr')"]/..//a[contains(@class,'copy')]""",
                 'data-clipboard-text',
                 'ssr'
             )
         # elif self.hyper_params['trojan']: ...
         # elif self.hyper_params['kit']: ...
         # elif self.hyper_params['qtl']: ...
     except TimeoutException:
         logger.error(f'>>> TimeoutException <{self.action_name}> -- {self.register_url}')
     except WebDriverException as e:
         logger.error(f">>> WebDriverException <{self.action_name}> -- {e}")
     except Exception as e:
         logger.exception(f">>> Exception <{self.action_name}> -- {e}")
     finally:
         api.quit()
示例#17
0
    def startup(self, driver_command_set: List[str]):
        """
        仅支持单进程使用
        @param driver_command_set: 在空指令时列表仅有1个元素,表示启动路径
        @return:
        """
        # logger.info(f">>> {' '.join(driver_command_set)}")

        # -------------------------------
        # TODO 优先级0:预处理指令集
        # -------------------------------
        # CommandId or List[CommandId]
        driver_command: List[str] = []

        # 未输入任何指令 列出脚手架简介
        if len(driver_command_set) == 1:
            print("\n".join([
                f">>> {menu[0].ljust(20, '-')}|| {menu[-1]}"
                for menu in command_set.items()
            ]))
            return True
        # 输入立即指令 转译指令
        if len(driver_command_set) == 2:
            driver_command = [
                driver_command_set[-1].lower(),
            ]
        # 输入指令集 转译指令集
        elif len(driver_command_set) > 2:
            driver_command = list(
                {command.lower()
                 for command in driver_command_set[1:]})

        # 捕获意料之外的情况
        if not isinstance(driver_command, list):
            return True
        # -------------------------------
        # TODO 优先级1:解析运行参数
        # -------------------------------

        # TODO --help 帮助菜单(继续完善相关功能)
        # 使用该参数时系统不解析运行指令
        if '--help' in driver_command:
            logger.info(">>>GuiderHelp || 帮助菜单")
            driver_command.remove("--help")
            for command_ in driver_command:
                introduction = command_set.get(command_)
                if introduction:
                    print(f"> {command_.ljust(20, '-')}|| {introduction}")
                else:
                    print(f"> {command_}指令不存在")
            return True

        # 智能采集 解析目标
        if '--parse' in driver_command:
            driver_command.remove('--parse')
            task_list = []
            for url_ in reversed(driver_command):
                if url_.startswith("http") or url_.startswith(
                        "ssr") or url_.startswith("vmess"):
                    task_list.append(
                        gevent.spawn(self._scaffold_parse, url=url_))
            gevent.joinall(task_list)
            return True

        # 清除系统缓存
        if 'clear' in driver_command:
            driver_command.remove('clear')
            self._scaffold_clear()
            return True
        # -------------------------------
        # TODO 优先级2:运行单线程指令
        # -------------------------------

        # 协程任务队列
        task_list = []

        # 测试数据库连接
        while driver_command.__len__() > 0:
            _pending_command = driver_command.pop()
            try:
                task_list.append(
                    gevent.spawn(self.command2solution[_pending_command]))
            except KeyError as e:
                logger.warning(f'脚手架暂未授权指令<{_pending_command}> {e}')

        # 并发执行以上指令
        gevent.joinall(task_list)

        # -------------------------------
        # TODO 优先级3:自定义参数部署(阻塞线程)
        # -------------------------------
        if 'deploy' in driver_command:
            self._scaffold_deploy()
示例#18
0
def sync_actions(
    class_: str,
    mode_sync: str = None,
    only_sync=False,
    beat_sync=True,
):
    """

    @param class_:
    @param mode_sync:  是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务
    @param only_sync:
    @param beat_sync:
    @return:
    """
    logger.info(
        f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...")

    # ================================================
    # 节拍停顿 原子同步
    # ================================================
    rc = RedisClient()
    _state = _is_overflow(task_name=class_, rc=rc)
    if _state == 'stop':
        return _state

    # ================================================
    # 更新任务信息
    # ================================================
    # 公示即将发动的采集任务数据
    _update_entropy(rc=rc, entropy=__entropy__)
    # 通由工厂读取映射表批量生产采集器运行实体
    sync_queue: list = ActionShunt(class_, silence=True,
                                   beat_sync=beat_sync).shunt()
    # 打乱任务序列
    random.shuffle(sync_queue)

    # ================================================
    # $执行核心业务
    # ================================================
    if mode_sync == 'upload':
        # fixme:临时方案:解决链接溢出问题
        if round(rc.get_len(REDIS_SECRET_KEY.format(class_)) *
                 1.25) > SINGLE_TASK_CAP:
            logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持")
            return None
        # 持续实例化采集任务
        for _ in range(sync_queue.__len__()):
            rc.sync_message_queue(mode='upload', message=class_)
            # 节拍同步线程锁
            if only_sync:
                logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务")
                break
        logger.success("<TaskManager> UploadTasks -- 任务上传完毕")
    elif mode_sync == 'download':
        async_queue: list = []
        while True:
            # 获取原子任务
            atomic = rc.sync_message_queue(mode='download')
            # 若原子有效则同步数据
            if atomic and atomic in CRAWLER_SEQUENCE:
                # 判断同步状态
                # 防止过载。当本地缓冲任务即将突破容载极限时停止同步
                # _state 状态有三,continue/offload/stop
                _state = _is_overflow(task_name=atomic, rc=rc)
                if _state != 'continue':
                    return _state
                if async_queue.__len__() == 0:
                    async_queue = ActionShunt(atomic,
                                              silence=True,
                                              beat_sync=beat_sync).shunt()
                    random.shuffle(async_queue)
                # 将采集器实体推送至Poseidon本机消息队列
                Middleware.poseidon.put_nowait(async_queue.pop())
                logger.info(
                    f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})'
                )
                # 节拍同步线程锁
                if only_sync:
                    logger.warning(
                        f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务"
                    )
                    return 'offload'
            else:
                return 'offload'
    elif mode_sync == 'force_run':
        for slave_ in sync_queue:
            # ================================================================================================
            # TODO v5.4.r 版本新增特性 scaffold spawn
            # 1. 之前版本中通由scaffold 无论运行 run 还是 force-run 指令都无法在队列满载的情况下启动采集任务
            # 主要原因在于如下几行代码加了锁
            # 2. 通过新增的spawn指令可绕过此模块通由SpawnBooster直接编译底层代码启动采集器
            # ================================================================================================
            # force_run :适用于单机部署或单步调试下
            # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数
            _state = _is_overflow(task_name=class_, rc=rc)
            if _state != 'continue':
                return _state

            # 将采集器实体推送至Poseidon本机消息队列
            Middleware.poseidon.put_nowait(slave_)

            # 节拍同步线程锁
            if only_sync:
                logger.warning(
                    f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                return 'stop'

        return 'offload'
示例#19
0
def manage_task(class_: str = 'v2ray',
                only_sync=False,
                run_collector=None,
                beat_sync=True,
                force_run=None) -> bool:
    """
    加载任务
    @param force_run: debug模式下的强制运行,可逃逸队列满载检测
    @param run_collector:创建协程工作空间,并开始并发执行队列任务。
    @param only_sync:节拍同步线程锁。当本机任务数大于0时,将1枚原子任务推送至Poseidon协程空间。
    @param class_: 任务类型,必须在 crawler seq内,如 ssr,v2ray or trojan。
    @param beat_sync:
    @return:
    """

    # ----------------------------------------------------
    # 参数审查与转译
    # ----------------------------------------------------
    # 若申请执行的任务类型不在本机授权范围内则结束本次任务
    if class_ not in CRAWLER_SEQUENCE:
        return False

    # collector_permission 审核采集权限,允许越权传参。当手动指定参数时,可授予本机采集权限,否则使用配置权限
    collector_permission: bool = ENABLE_DEPLOY.get('tasks').get(
        'collector') if run_collector is None else run_collector

    # force_run 强制运行,若不指定该参数,则以“是否单机部署”决定“是否运行force_run”
    # 既默认单机模式下开启force_run
    # 若未传参时也未定义部署形式(null),则默认不使用force_run
    force_run = force_run if force_run else SINGLE_DEPLOYMENT

    # ----------------------------------------------------
    # 解析同步模式
    # ----------------------------------------------------
    # 以本机是否有采集权限来区分download 以及upload两种同步模式
    mode_sync = "download" if collector_permission is True else "upload"

    # 以更高优先级的`force_run` 替代传统同步模式,执行强制采集方案
    mode_sync = "force_run" if force_run is True else mode_sync

    # ----------------------------------------------------
    # 同步消息(任务)队列
    # ----------------------------------------------------
    # IF 本机具备采集权限,将任务同步至本机执行,在单机部署情况下任务自产自销。
    # ELSE 负责将生成的任务加入消息队列
    response: str or bool = sync_actions(
        class_=class_,
        only_sync=only_sync,
        beat_sync=beat_sync,
        mode_sync=mode_sync,
    )

    # ----------------------------------------------------
    # 初始化协程空间(执行任务)
    # ----------------------------------------------------
    # 若本机开启了采集器权限则创建协程空间
    # 若从control-deploy进入此函数,则说明本机必定具备创建协程空间权限
    if force_run:
        if (response == 'offload') and (Middleware.poseidon.qsize() > 0):
            logger.info(f'<TaskManager> ForceRun || <{class_}>采集任务启动')
            ShuntRelease(work_queue=Middleware.poseidon).interface()
        logger.success(f'<TaskManager> ForceWorkFinish || <{class_}>采集任务结束')
        return True

    # if 'force_run' is False and the node has the permissions of collector
    if collector_permission:
        # if task queue can be work
        if (response == 'offload') and (Middleware.poseidon.qsize() > 0):
            logger.info('<TaskManager> Run || 采集任务启动')
            ShuntRelease(work_queue=Middleware.poseidon).interface()
        logger.success('<TaskManager> Finish || 采集任务结束')
        return True
    # logger.warning(f"<TaskManager> Hijack<{class_}> || 当前节点不具备采集权限")
    return False
示例#20
0
def _sync_actions(
        class_: str,
        mode_sync: str = None,
        only_sync=False,
        beat_sync=True,
):
    """

    @param class_:
    @param mode_sync:  是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务
    @param only_sync:
    @param beat_sync:
    @return:
    """
    logger.info(f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...")

    # TODO 原子化同步行为
    rc = RedisClient()

    # 拷贝生成队列,需使用copy()完成拷贝,否则pop()会影响actions-list本体
    # [A-Cloud,B-Cloud, ...]
    task_list: list = actions.__all__.copy()
    random.shuffle(task_list)

    # 在本机环境中生成任务并加入消息队列
    if mode_sync == 'upload':

        # 临时方案,解决链接溢出问题
        if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP:
            logger.warning("<TaskManager> UploadHijack -- 连接池任务已溢出,上传任务被劫持")
            return None

        # 持续实例化采集任务
        while True:
            if task_list.__len__() == 0:
                logger.success("<TaskManager> EmptyList -- 本机任务为空或已完全生成")
                break
            else:
                slave_ = task_list.pop()

                # 将相应的任务执行语句转换成exec语法
                expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \
                       f'{slave_}(beat_sync={beat_sync}).run()'

                # 将执行语句同步至消息队列
                rc.sync_message_queue(mode='upload', message=expr)

                # 节拍同步线程锁
                if only_sync:
                    logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务")
                    break

        logger.info(f"<TaskManager> 本节点任务({actions.__all__.__len__()})已同步至消息队列,"
                    f"待集群接收订阅后既可完成后续任务")

    # 同步分布式消息队列的任务
    elif mode_sync == 'download':
        while True:

            # 判断同步状态
            # 防止过载。当本地缓冲任务即将突破容载极限时停止同步
            # _state 状态有三,continue/offload/stop
            _state = _is_overflow(task_name=class_, rc=rc)
            if _state != 'continue':
                return _state

            # 获取原子任务,该任务应已封装为exec语法
            # todo 将入队操作封装到redis里,以获得合理的循环退出条件
            atomic = rc.sync_message_queue(mode='download')

            # 若原子有效则同步数据
            if atomic:
                # 将执行语句推送至Poseidon本机消息队列
                Middleware.poseidon.put_nowait(atomic)
                logger.info(f'<TaskManager> offload atomic<{class_}>')

                # 节拍同步线程锁
                if only_sync:
                    logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                    return 'offload'

            # 否则打印警告日志并提前退出同步
            else:
                logger.warning(f"<TaskManager> SyncFinish -- <{class_}>无可同步任务")
                break

    elif mode_sync == 'force_run':
        for slave_ in task_list:

            # force_run :适用于单机部署或单步调试下
            _state = _is_overflow(task_name=class_, rc=rc)

            # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数
            if _state == 'stop':
                return 'stop'

            # 将相应的任务执行语句转换成exec语法
            expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \
                   f'{slave_}(beat_sync={beat_sync}).run()'

            # 将执行语句推送至Poseidon本机消息队列
            Middleware.poseidon.put_nowait(expr)

            # 在force_run模式下仍制约于节拍同步线程锁
            # 此举服务于主机的订阅补充操作
            # 优先级更高,不受队列可用容载影响强制中断同步操作
            if only_sync:
                logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                return 'stop'
        else:
            logger.success(f"<TaskManager> ForceCollect"
                           f" -- 已将本地预设任务({actions.__all__.__len__()})录入待执行队列")
            return 'offload'
示例#21
0
 def _scaffold_ping():
     from src.BusinessCentralLayer.middleware.redis_io import RedisClient
     logger.info(f"<ScaffoldGuider> Ping || {RedisClient().test()}")
示例#22
0
 def _scaffold_decouple():
     logger.info("<ScaffoldGuider> Decouple || General startup")
     from src.BusinessLogicLayer.plugins.accelerator import SubscribesCleaner
     SubscribesCleaner(debug=True).interface(power=DEFAULT_POWER)
示例#23
0
def _sync_actions(
    class_: str,
    mode_sync: str = None,
    only_sync=False,
    beat_sync=True,
):
    """

    @param class_:
    @param mode_sync:  是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务
    @param only_sync:
    @param beat_sync:
    @return:
    """
    logger.info(
        f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...")

    # TODO 原子化同步行为
    rc = RedisClient()

    # 节拍停顿
    _state = _is_overflow(task_name=class_, rc=rc)
    if _state == 'stop':
        return _state

    sync_queue: list = ActionShunt(class_, silence=True,
                                   beat_sync=beat_sync).shunt()
    random.shuffle(sync_queue)

    # 在本机环境中生成任务并加入消息队列
    if mode_sync == 'upload':

        # fixme:临时方案:解决链接溢出问题
        if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) *
                 1.25) > SINGLE_TASK_CAP:
            logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持")
            return None

        # 持续实例化采集任务
        for _ in range(sync_queue.__len__()):

            rc.sync_message_queue(mode='upload', message=class_)

            # 节拍同步线程锁
            if only_sync:
                logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务")
                break
        logger.success("<TaskManager> UploadTasks -- 任务上传完毕")

    # 同步分布式消息队列的任务
    elif mode_sync == 'download':
        async_queue: list = []

        while True:

            # 获取原子任务
            atomic = rc.sync_message_queue(mode='download')

            # 若原子有效则同步数据
            if atomic and atomic in CRAWLER_SEQUENCE:

                # 判断同步状态
                # 防止过载。当本地缓冲任务即将突破容载极限时停止同步
                # _state 状态有三,continue/offload/stop
                _state = _is_overflow(task_name=atomic, rc=rc)
                if _state != 'continue':
                    return _state

                if async_queue.__len__() == 0:
                    async_queue = ActionShunt(atomic,
                                              silence=True,
                                              beat_sync=beat_sync).shunt()
                    random.shuffle(async_queue)

                # 将执行语句推送至Poseidon本机消息队列
                Middleware.poseidon.put_nowait(async_queue.pop())

                logger.info(
                    f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})'
                )

                # 节拍同步线程锁
                if only_sync:
                    logger.warning(
                        f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务"
                    )
                    return 'offload'

            # 否则打印警告日志并提前退出同步
            else:
                # logger.warning(f"<TaskManager> SyncFinish -- <{atomic}>无可同步任务")
                return 'offload'

    elif mode_sync == 'force_run':
        for slave_ in sync_queue:

            # force_run :适用于单机部署或单步调试下
            # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数
            _state = _is_overflow(task_name=class_, rc=rc)
            if _state != 'continue':
                return _state

            # 将执行语句推送至Poseidon本机消息队列
            Middleware.poseidon.put_nowait(slave_)

            # 在force_run模式下仍制约于节拍同步线程锁
            # 此举服务于主机的订阅补充操作
            # 优先级更高,不受队列可用容载影响强制中断同步操作
            if only_sync:
                logger.warning(
                    f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                return 'stop'

        return 'offload'
示例#24
0
 def _scaffold_overdue():
     logger.info("<ScaffoldGuider> Overdue || Redis DDT")
     from src.BusinessCentralLayer.middleware.interface_io import SystemInterface
     SystemInterface.ddt()
示例#25
0
 def _scaffold_run():
     _ConfigQuarantine.check_config(call_driver=True)
     logger.info("<ScaffoldGuider> Run || MainCollector")
     from src.BusinessCentralLayer.middleware.interface_io import SystemInterface
     SystemInterface.run(deploy_=False)
示例#26
0
def manage_task(
        class_: str = 'v2ray',
        speedup: bool = True,
        only_sync=False,
        startup=None,
        beat_sync=True,
        force_run=None
) -> bool:
    """
    加载任务
    @param force_run: debug模式下的强制运行,可逃逸队列满载检测
    @param startup:创建协程工作空间,并开始并发执行队列任务。
    @param only_sync:节拍同步线程锁。当本机任务数大于0时,将1枚原子任务推送至Poseidon协程空间。
    @param class_: 任务类型,必须在 crawler seq内,如 ssr,v2ray or trojan。
    @param speedup: 使用加速插件。默认使用coroutine-speedup。
    @param beat_sync:
    @return:
    """

    # ----------------------------------------------------
    # 参数审查与转译
    # ----------------------------------------------------

    # 检查输入
    if class_ not in CRAWLER_SEQUENCE or not isinstance(class_, str):
        return False

    # 审核采集权限,允许越权传参。当手动指定参数时,可授予本机采集权限,否则使用配置权限
    local_work: bool = startup if startup else ENABLE_DEPLOY.get('tasks').get('collector')

    # 强制运行:指定参数优先级更高,若不指定则以是否单机部署模式决定运行force_run是否开启
    # 默认单机模式下开启force_run
    # 若未传参时也未定义部署形式(null),则默认不使用force_run
    force_run = force_run if force_run else SINGLE_DEPLOYMENT

    # ----------------------------------------------------
    # 解析同步模式
    # ----------------------------------------------------
    # 以本机是否有采集权限来区分download 以及upload两种同步模式
    mode_sync = "download" if local_work else "upload"

    # 以更高优先级的`force_run` 替代传统同步模式,执行强制采集方案
    mode_sync = "force_run" if force_run else mode_sync

    # ----------------------------------------------------
    # 同步消息(任务)队列
    # ----------------------------------------------------
    # 当本机可采集时,将任务同步至本机执行,若消息队列为空则
    # 若本机不可采集,则生成任务加入消息队列
    response: str or bool = _sync_actions(
        class_=class_,
        only_sync=only_sync,
        beat_sync=beat_sync,
        mode_sync=mode_sync,
    )

    # ----------------------------------------------------
    # 初始化协程空间(执行任务)
    # ----------------------------------------------------
    # 若本机开启了采集器权限则创建协程空间
    # 若从control-deploy进入此函数,则说明本机必定具备创建协程空间权限
    if force_run:
        if response == 'offload':
            logger.info(f'<TaskManager> ForceRun || <{class_}>采集任务启动')
            vsu(core=PuppetCore(), docker=Middleware.poseidon).run(speedup)
        logger.success(f'<TaskManager> ForceWorkFinish || <{class_}>采集任务结束')
        return True

    # if 'force_run' is False and the node has the permissions of collector
    if local_work:
        # if task queue can be work
        if response == 'offload':
            logger.info(f'<TaskManager> Run || <{class_}>采集任务启动')
            vsu(core=PuppetCore(), docker=Middleware.poseidon).run(speedup)
        logger.success(f'<TaskManager> Finish || <{class_}>采集任务结束')
        return True
    else:
        logger.warning(f"<TaskManager> Hijack<{class_}> || 当前节点不具备采集权限")
        return False
示例#27
0
    def __init__(self) -> None:
        logger.info(f'<系统初始化> SystemEngine -> {platform}')

        # 读取配置序列
        logger.info(f'<定位配置> check_sequence:{CRAWLER_SEQUENCE}')

        # 默认linux下自动部署
        logger.info(f'<部署设置> enable_deploy:{ENABLE_DEPLOY}')

        # 协程加速配置
        logger.info(f"<协程加速> Coroutine:{enable_coroutine}")

        # 解压接口容器
        logger.info("<解压容器> DockerEngineInterface")

        # 初始化进程
        logger.info(f'<加载队列> IndexQueue:{actions.__entropy__}')

        logger.success('<Gevent> 工程核心准备就绪 任务即将开始')
示例#28
0
 def _scaffold_force_run():
     _ConfigQuarantine.check_config(call_driver=True)
     logger.info("<ScaffoldGuider> ForceRun || MainCollector")
     from src.BusinessLogicLayer.plugins.accelerator import ForceRunRelease
     ForceRunRelease(task_docker=CRAWLER_SEQUENCE).interface()