def get_init_objs(cls) -> dict: """ 获取工具初始化对象 @returns {dict} - 初始化对象 """ # 获取配置文件信息 _execute_path = os.path.realpath(FileTool.get_file_path(__file__)) _config = os.path.join(_execute_path, '../conf/server.xml') _config_xml = SimpleXml(_config, encoding='utf-8') _server_config = _config_xml.to_dict()['server'] # 日志对象 _logger: Logger = None if 'logger' in _server_config.keys(): _logger = Logger.create_logger_by_dict(_server_config['logger']) # 连接数据库操作对象 _qa_manager = QAManager( _server_config['answerdb'], _server_config['milvus'], _server_config['bert_client'], logger=_logger, excel_batch_num=_server_config['excel_batch_num'], excel_engine=_server_config['excel_engine'], load_para=False) return {'logger': _logger, 'qa_manager': _qa_manager}
def __init__(self, html: str, use_xpath2=False): """ html解析器 @param {str} html - 要解析的html代码 @param {bool} use_xpath2=False - 是否使用xpath2.0,默认为False """ self.html_doc = SimpleXml(html, obj_type=EnumXmlObjType.String, parser='html', use_xpath2=use_xpath2)
def init_pipeline_plugins(): """ 装载管道插件 """ # 装载配置 _execute_path = os.path.realpath(os.path.join( os.path.dirname(__file__), os.path.pardir, 'search_by_image' )) RunTool.set_global_var('EXECUTE_PATH', _execute_path) _config = os.path.join(_execute_path, 'conf/server_jade.xml') _config_xml = SimpleXml(_config, encoding='utf-8') _server_config = _config_xml.to_dict()['server'] RunTool.set_global_var( 'PIPELINE_PROCESSER_PARA', _server_config['pipeline']['processer_para'] ) RunTool.set_global_var('PIPELINE_ROUTER_PARA', _server_config['pipeline']['router_para']) _plugins_path_list = _server_config['pipeline']['plugins_path'].split(',') for _plugins_path in _plugins_path_list: Pipeline.load_plugins_by_path( os.path.join(_execute_path, _plugins_path.strip()) ) _logger: Logger = None if 'logger' in _server_config.keys(): _logger_config = _server_config['logger'] if len(_logger_config['conf_file_name']) > 0 and _logger_config['conf_file_name'][0] == '.': # 相对路径 _logger_config['conf_file_name'] = os.path.join( _execute_path, _logger_config['conf_file_name'] ) if len(_logger_config['logfile_path']) > 0 and _logger_config['logfile_path'][0] == '.': # 相对路径 _logger_config['logfile_path'] = os.path.join( _execute_path, _logger_config['logfile_path'] ) _logger = Logger.create_logger_by_dict(_logger_config) # 创建空管道用于测试 _empty_pipeline = Pipeline('empty', '{}', logger=_logger) RunTool.set_global_var('EMPTY_PIPELINE', _empty_pipeline) # 创建测试管道 _jade_pipeline = Pipeline( 'jade_search', _server_config['pipeline']['pipeline_config']['JadeSearch'], logger=_logger ) RunTool.set_global_var('JADE_PIPELINE', _jade_pipeline)
def auto_install(self, is_init: bool): """ 自动检索目录并进行安装 @param {bool} is_init - 指示是否第一次运行 """ _deal_plugins = list() _dirs = FileTool.get_dirlist(self.plugin_path, is_fullpath=False) for _dir_name in _dirs: _plugin_config = SimpleXml( os.path.join(self.plugin_path, _dir_name, 'plugin.xml')).to_dict()['config'] # 登记信息 _deal_plugins.append(_plugin_config['info']['plugin_name']) if self.is_installed(_plugin_config['info']['plugin_name']): # 已安装, 判断是否要强制更新 if not (is_init and self.always_update): # 非强制更新 continue # 执行安装处理 self.install(_dir_name) # 卸载插件目录没有的插件 _fetchs = self._exec_sql('select plugin_name from t_installed', is_fetchall=True) for _row in _fetchs: if _row[0] not in _deal_plugins: # 插件已不存在,卸载处理 self.uninstall(_row[0])
def __init__(self): """ 初始化测试参数 """ # 获取配置 self.execute_path = os.path.realpath( os.path.join(os.path.dirname(__file__), os.path.pardir, 'search_by_image')) RunTool.set_global_var('EXECUTE_PATH', self.execute_path) _config = os.path.join(self.execute_path, 'conf/server.xml') _config_xml = SimpleXml(_config, encoding='utf-8') self.server_config = _config_xml.to_dict()['server'] # 装载管道插件 RunTool.set_global_var( 'PIPELINE_PROCESSER_PARA', self.server_config['pipeline']['processer_para']) RunTool.set_global_var('PIPELINE_ROUTER_PARA', self.server_config['pipeline']['router_para']) _plugins_path_list = self.server_config['pipeline'][ 'plugins_path'].split(',') for _plugins_path in _plugins_path_list: Pipeline.load_plugins_by_path( os.path.join(self.execute_path, _plugins_path.strip())) # 日志对象 self.logger: Logger = None if 'logger' in self.server_config.keys(): _logger_config = self.server_config['logger'] if len(_logger_config['conf_file_name'] ) > 0 and _logger_config['conf_file_name'][0] == '.': # 相对路径 _logger_config['conf_file_name'] = os.path.join( self.execute_path, _logger_config['conf_file_name']) if len(_logger_config['logfile_path'] ) > 0 and _logger_config['logfile_path'][0] == '.': # 相对路径 _logger_config['logfile_path'] = os.path.join( self.execute_path, _logger_config['logfile_path']) self.logger = Logger.create_logger_by_dict(_logger_config) # 创建搜索引擎 self.search_engine = SearchEngine(self.server_config, logger=self.logger)
def test_xml_namespace(self): """ 测试带命名空间的处理 """ print('测试命名空间处理') print('测试命名空间处理 - 装载文件并访问信息') _file = self.file_path + '/with_namespace.xml' _pfile = SimpleXml(_file, obj_type=EnumXmlObjType.File, encoding=None, use_chardet=True, remove_blank_text=True) ns = { 'people': 'http://people.example.com', 'role': 'http://characters.example.com' } _text = _pfile.get_value('people:actor[1]/people:name', default='None', namespaces=ns) self.assertTrue(_text == 'John Cleese - 中文', '失败:测试命名空间处理 - 装载文件并访问信息 - 步骤1获取值错误:%s' % _text) _text = _pfile.get_value( '/people:actors/people:actor[1]/role:character[1]', default='None', namespaces=ns) self.assertTrue(_text == 'Lancelot', '失败:测试命名空间处理 - 装载文件并访问信息 - 步骤2获取值错误:%s' % _text) print('测试命名空间处理 - 新增节点') _pfile.set_value('people:test/role:myname', '新值', namespaces=ns) _text = _pfile.get_value('people:test/role:myname', default='None', namespaces=ns) self.assertTrue(_text == '新值', '失败:测试命名空间处理 - 新增节点 - 值错误:%s' % _text)
def find_elements(self, by: str = MobileBy.ID, value=None, timeout: float = 0.0, interval: float = 0.5) -> list: """ 查找元素 @param {str|MobileBy} by=By.ID - 查找类型 @param {str|dict} value=None - 查找类型对应的查找参数,不同类型的定义如下 MobileBy.ID {str} - 要获取的元素的id属性,例如 'foo_id' MobileBy.XPATH {str} - 要获取元素的xpath字符串,例如 '//div/td[1]' @param {float} timeout=0.0 - 最大等待超时时间,单位为秒 @param {float} interval=0.5 - 每次检查间隔时间,单位为秒 @returns {list[AppElement]} - 查找到的对象 """ _list = list() _current_package = self.current_package _page_source = self.page_source _doc = SimpleXml(_page_source, obj_type=EnumXmlObjType.String) _start = datetime.datetime.now() while len(_list) == 0: if by == MobileBy.ID: # 通过ID查找对象 _nodes = _doc.get_nodes('//*[@resource-id="%s:id/%s"]' % (_current_package, value)) else: # 通过xpath查找 _nodes = _doc.get_nodes(value) for _element in _nodes: _list.append(AppElement(_element, self, _doc)) # 判断是否超时 if (datetime.datetime.now() - _start).total_seconds() < timeout: time.sleep(interval) continue else: break return _list
def install(self, dir_name: str): """ 安装插件 @param {str} dir_name - 插件所在路径名 """ _plugin_path = os.path.join(self.plugin_path, dir_name) # 获取插件配置 _plugin_config = SimpleXml(os.path.join( _plugin_path, 'plugin.xml')).to_dict()['config']['info'] # 复制文件到对应目录 _static_path = os.path.join(_plugin_path, 'static') # 静态文件, js/css/img/html等 if os.path.exists(_static_path): # 先创建目标文件 _dest_path = os.path.join(self.static_path, 'plugin', _plugin_config['plugin_name']) FileTool.create_dir(_dest_path, exist_ok=True) FileTool.copy_all_with_path(src_path=_static_path, dest_path=_dest_path, exist_ok=True) _templates_path = os.path.join(_plugin_path, 'templates') # 模板文件 if os.path.exists(_templates_path): # 先创建目标文件 _dest_path = os.path.join(self.templates_path, 'plugin', _plugin_config['plugin_name']) FileTool.create_dir(_dest_path, exist_ok=True) FileTool.copy_all_with_path(src_path=_templates_path, dest_path=_dest_path, exist_ok=True) _config_path = os.path.join(_plugin_path, 'config') # 配置文件 if os.path.exists(_config_path): # 先创建目标文件 _dest_path = os.path.join(self.config_path, 'plugin', _plugin_config['plugin_name']) FileTool.create_dir(_dest_path, exist_ok=True) FileTool.copy_all_with_path(src_path=_config_path, dest_path=_dest_path, exist_ok=True) # 登记安装信息 self._exec_sql("replace into t_installed values(?, ?, ?)", para=(_plugin_config['plugin_name'], _plugin_config['show_name'], dir_name)) # 加载插件 self.load_plugin(_plugin_config['plugin_name'])
def test_xml_to_dict(self): """ 测试xml转换为字典的情况 """ print('测试SimpleXml - to_dict') _file = self.file_path + '/to_dict.xml' _pfile = SimpleXml(_file, obj_type=EnumXmlObjType.File, encoding=None, use_chardet=True, remove_blank_text=True) _item_dict_xpaths = {'/data/country[2]/list': None} _dict = _pfile.to_dict(item_dict_xpaths=_item_dict_xpaths) print(_dict) self.assertTrue( _dict['data'][0][0] == 2, '失败:测试SimpleXml - to_dict - 检查1:%d' % _dict['data'][0][0]) self.assertTrue( _dict['data'][0][3][3] is True, '失败:测试SimpleXml - to_dict - 检查2:%s' % str(_dict['data'][0][3][3])) self.assertTrue( _dict['data'][1]['list'][0]['b1'] == 'b1', '失败:测试SimpleXml - to_dict - 检查3:%s' % str(_dict['data'][1]['list'][0]['b1']))
def _setlanguage_cmd_dealfun(self, message='', cmd='', cmd_para='', prompt_obj=None, **kwargs): """ 修改显示语言 @param {string} message='' - prompt提示信息 @param {string} cmd - 执行的命令key值 @param {string} cmd_para - 传入的命令参数(命令后的字符串,去掉第一个空格) @param {PromptPlus} prompt_obj=None - 传入调用函数的PromptPlus对象,可以通过该对象的一些方法控制输出显示 @param {kwargs} - 传入的主进程的初始化kwargs对象 @returns {CResult} - 命令执行结果,可通过返回错误码10101通知框架退出命令行, 同时也可以通过CResult对象的 print_str属性要求框架进行打印处理 """ _result = CResult(code='00000') _lang = cmd_para.strip() if _lang == '': prompt_obj.prompt_print(_('language must be not null')) return _result self._console_global_para['language'] = cmd_para.strip() _i18n_obj = get_global_i18n() if _i18n_obj is not None: _i18n_obj.lang = self._console_global_para['language'] # 修改配置文件中的默认语言 _config_xml = SimpleXml( self._console_global_para['config_file'], encoding=self._console_global_para['config_encoding']) _config_xml.set_value('/console/language', self._console_global_para['language']) _config_xml.save() prompt_obj.prompt_print( _("Current language set to '$1'", self._console_global_para['language'])) return _result
def console_main(execute_file_path=None, default_config_file=None, console_self_config: dict = None, **kwargs): """ 启动命令行框架的主函数 @param {string} execute_file_path=None - 外部调用该函数应将程序主目录传入,这样才能找到配置文件 @param {string} default_config_file=None - 如果您希望用不同的配置文件路径作为默认路径,可传入该函数指定 @param {dict} console_self_config=None - 命令行自定义配置信息,将添加到全局参数中 """ # 获取命令行参数,需要外部传入config、encoding参数 # 例如 console.py config=/conf/config.xml encoding=utf-8 help=y shell_cmd=以命令行方式执行命令 shell_cmdfile=cmdfile.txt cmdfile_encoding=utf-8 CONSOLE_GLOBAL_PARA = RunTool.get_global_var('CONSOLE_GLOBAL_PARA') if CONSOLE_GLOBAL_PARA is None: CONSOLE_GLOBAL_PARA = {} RunTool.set_global_var('CONSOLE_GLOBAL_PARA', CONSOLE_GLOBAL_PARA) # 自定义配置信息 CONSOLE_GLOBAL_PARA['console_self_config'] = console_self_config # 程序主目录 if execute_file_path is None: CONSOLE_GLOBAL_PARA['execute_file_path'] = os.path.realpath( FileTool.get_file_path(__file__)) else: CONSOLE_GLOBAL_PARA['execute_file_path'] = execute_file_path # 工作目录,可以通过cd命令切换,通过pwd命令查看工作目录路径 CONSOLE_GLOBAL_PARA['work_path'] = os.getcwd() _cmd_opts = RunTool.get_kv_opts() _default_config_file = default_config_file if _default_config_file is None: _default_config_file = os.path.join( CONSOLE_GLOBAL_PARA['execute_file_path'], 'conf/config.xml') _config = ( _default_config_file ) if 'config' not in _cmd_opts.keys() else _cmd_opts['config'] _encoding = 'utf-8' if 'encoding' not in _cmd_opts.keys() else _cmd_opts['encoding'] CONSOLE_GLOBAL_PARA['config_encoding'] = _encoding CONSOLE_GLOBAL_PARA['config_file'] = _config # 获取配置文件信息 _config_xml = SimpleXml(os.path.realpath(_config), encoding=_encoding) _config_dict = _config_xml.to_dict() # 启动控制台服务 _server = ConsoleServer(_config_dict['console']) # 判断是否 if 'help' in _cmd_opts.keys(): # 执行帮助命令 _lang = _config_dict['console']['language'] _help_tips = StringTool.json_to_object( _config_dict['console']['shell_cmd_help'] ) _tips = '' # 如果找不到对应的语言,优先找英语,如果再找不到就找第一个 if _lang in _help_tips.keys(): _tips = _help_tips[_lang] elif 'en' in _help_tips.keys(): _tips = _help_tips['en'] else: _tips = _help_tips[_help_tips.keys()[0]] _print_str = '\r\n'.join(_tips).replace( '{{VERSION}}', _config_dict['console']['version'] ).replace( '{{NAME}}', _config_dict['console']['name'] ).replace( '{{SHELL_CMD_NAME}}', _config_dict['console']['shell_cmd_name'] ) # 打印 print(_print_str) elif 'shell_cmd' in _cmd_opts.keys(): # 命令行模式执行 _cmd_list = None if _cmd_opts['shell_cmd'][0: 1] == '[' and _cmd_opts['shell_cmd'][-1:] == ']': # 是json数组格式 _cmd_list = StringTool.json_to_object(_cmd_opts['shell_cmd']) else: _cmd_list = [_cmd_opts['shell_cmd']] # 逐个命令执行 for _cmd in _cmd_list: _result = _server.call_cmd_directly(_cmd, shell_cmd=True) if not _result.is_success(): # 执行有错误,不继续执行 exit(1) # 正常完成 exit(0) elif 'shell_cmdfile' in _cmd_opts.keys(): _file_encoding = None if 'cmdfile_encoding' in _cmd_opts.keys() and _cmd_opts['cmdfile_encoding'] != '': _file_encoding = _cmd_opts['cmdfile_encoding'] _cmd_text = FileTool.get_file_text(_cmd_opts['shell_cmdfile'], encoding=_file_encoding) _cmd_text = _cmd_text.replace('\r\n', '\n').replace('\r', '\n') _cmd_list = _cmd_text.split('\n') # 逐个命令执行 for _cmd in _cmd_list: _result = _server.call_cmd_directly(_cmd, shell_cmd=True) if not _result.is_success(): # 执行有错误,不继续执行 exit(1) # 正常完成 exit(0) else: _server.start_console()
def test_xml_gbk(self): """ 测试SimpleXml """ print('测试SimpleXml - gbk编码文件') _file = self.file_path + '/encode_gbk.xml' _pfile = SimpleXml(_file, obj_type=EnumXmlObjType.File, encoding=None, use_chardet=True, remove_blank_text=True) _text = _pfile.get_attr('country[1]', 'name', default='None') self.assertTrue(_text == '中国', '失败:测试SimpleXml - gbk编码文件 - 获取属性值错误:%s' % _text) _text = _pfile.get_value('country[1]/year') self.assertTrue(_text == '2008', '失败:测试SimpleXml - gbk编码文件 - 获取值错误:%s' % _text) print('测试SimpleXml - 获取xpath') _node = _pfile.get_nodes('/data/country[1]/year')[0] _xpath = _pfile.get_xpath(_node) self.assertTrue(_xpath == '/data/country[1]/year', '失败:测试SimpleXml - 获取xpath - 路径错误:%s' % _xpath) print('测试SimpleXml - 设置值') _pfile.set_value('country[@name="Singapore"]/year', '设置值-原节点') _text = _pfile.get_value('country[@name="Singapore"]/year') self.assertTrue( _text == '设置值-原节点', '失败:测试SimpleXml - 测试SimpleXml - 设置值-原节点 - 值错误:%s' % _text) _pfile.set_value('new_node/my_node/test', '设置值-新节点') _text = _pfile.get_value('new_node/my_node/test') self.assertTrue( _text == '设置值-新节点', '失败:测试SimpleXml - 测试SimpleXml - 设置值-新节点 - 值错误:%s' % _text) _pfile.set_attr('country[@name="Singapore"]/rank', 'updated', '设置属性值-原节点') _text = _pfile.get_attr('country[@name="Singapore"]/rank', 'updated') self.assertTrue( _text == '设置属性值-原节点', '失败:测试SimpleXml - 测试SimpleXml - 设置属性值-新节点 - 值错误:%s' % _text) _pfile.set_attr('new_node/my_node/test1', 'updated', '设置属性值-新节点') _text = _pfile.get_attr('new_node/my_node/test1', 'updated', '设置属性值-新节点') self.assertTrue( _text == '设置属性值-新节点', '失败:测试SimpleXml - 测试SimpleXml - 设置属性值-新节点 - 值错误:%s' % _text) print('测试SimpleXml - 删除节点') _pfile.remove('new_node', hold_tail=True) _node = _pfile.get_nodes('new_node') self.assertTrue( len(_node) == 0, '失败:测试SimpleXml - 测试SimpleXml - 删除节点 - 删除错误:%d' % len(_node)) print('测试SimpleXml - 保存文件') _file = _TEMP_DIR + '/encode_gbk_temp.xml' _pfile.save(file=_file, encoding='utf-8', xml_declaration=True, pretty_print=True)
_opts = RunTool.get_kv_opts() # 获取配置信息值 _import = _opts.get('import', None) # 要导入的excel文件 _config = _opts.get('config', None) # 指定配置文件 _encoding = _opts.get('encoding', 'utf-8') # 配置文件编码 _truncate = (_opts.get('truncate', 'false') == 'true') # 是否清空标志,与操作搭配使用 _milvus = _opts.get('del_milvus', None) # 要删除的问题分类清单,用,分隔 _db = (_opts.get('del_db', 'false') == 'true') # 要重置数据库 # 获取配置文件信息 _execute_path = os.path.realpath(FileTool.get_file_path(__file__)) if _config is None: _config = os.path.join(_execute_path, 'conf/server.xml') _config_xml = SimpleXml(_config, encoding=_encoding) _server_config = _config_xml.to_dict()['server'] # 日志对象 _logger: Logger = None if 'logger' in _server_config.keys(): _logger = Logger.create_logger_by_dict(_server_config['logger']) # 连接数据库操作对象 _qa_manager = QAManager(_server_config['answerdb'], _server_config['milvus'], _server_config['bert_client'], logger=_logger, excel_batch_num=_server_config['excel_batch_num'], excel_engine=_server_config['excel_engine'], load_para=False)
def load_plugin(self, plugin_name: str): """ 装载已安装的插件到服务中 @param {str} plugin_name - 插件名 """ _fetchs = self._exec_sql( 'select dir_name from t_installed where plugin_name=?', para=(plugin_name, ), is_fetchall=True) _dir_name = _fetchs[0][0] # 获取插件配置 _plugin_config = SimpleXml( os.path.join(self.plugin_path, _dir_name, 'plugin.xml')).to_dict()['config'] # 装载后台服务 self.imported_moudles[plugin_name] = dict() _import = _plugin_config['import'] for _module_name in _import.keys(): # 装载模块 _extend_path = os.path.join(self.plugin_path, _dir_name, 'lib', _import[_module_name]['extend_path']) _module = ImportTool.import_module(_module_name, extend_path=_extend_path) self.imported_moudles[plugin_name][_module_name] = _module # 初始化类并加入Restful服务 _init_class = _import[_module_name]['init_class'] for _class_name in _init_class.keys(): _class_object = ImportTool.get_member_from_module( _module, _class_name) if _init_class[_class_name]['init_type'] == 'instance': _class_object = _class_object( config_services=self.config_services, device_services=self.device_services, config_path=os.path.join( self.config_path, 'plugin', _plugin_config['info']['plugin_name']), plugin_path=self.plugin_path, logger=self.logger) # 加入Restful服务 if _init_class[_class_name].get('add_route_by_class', False): # 处理服务黑名单 _blacklist = _init_class[_class_name].get('blacklist', []) for _i in range(len(_blacklist)): _blacklist[_i] = '%s/%s' % (_class_name, _blacklist[_i]) # 添加路由 self.flask_server.add_route_by_class([_class_object], blacklist=_blacklist) # 装载主页的入口路由 if _plugin_config['info'].get('url', '') != '': self.flask_server.add_route(_plugin_config['info']['url'], self.plugin_index, endpoint=plugin_name) # 处理插件内存信息 _plugin_config['info']['plugin_name'] = plugin_name # 避免中途被修改 self.plugins_dict[plugin_name] = _plugin_config['info'] if _plugin_config['info']['entrance_type'] == 'toolbar': self.plugins_toolbar.append(plugin_name) else: self.plugins_tab.append(plugin_name)
def _graph_cmd_dealfun(self, message='', cmd='', cmd_para='', prompt_obj=None, **kwargs): """ 生成APP网络关系图 @param {string} message='' - prompt提示信息 @param {string} cmd - 执行的命令key值 @param {string} cmd_para - 传入的命令参数(命令后的字符串,去掉第一个空格) @param {PromptPlus} prompt_obj=None - 传入调用函数的PromptPlus对象,可以通过该对象的一些方法控制输出显示 @param {kwargs} - 传入的主进程的初始化kwargs对象 @returns {CResult} - 命令执行结果,可通过返回错误码10101通知框架退出命令行, 同时也可以通过CResult对象的 print_str属性要求框架进行打印处理 """ _ok_result = CResult(code='00000') try: # 获取参数及处理参数 _execute_path = self._console_global_para['execute_file_path'] _run_para = { 'file': '', 'formatter': 'col-list', 'comment': 'graph', 'outformat': 'pdf', 'direction': 'forward', 'trace_id': '', 'display_no_trace': 'false', 'down_flow_first': 'true', 'trace_relations': None, 'save': '', 'temp': os.path.join(_execute_path, 'temp'), 'view': 'true', 'cleanup': 'false', 'graph_config': os.path.join(_execute_path, 'conf/graph_config.xml'), 'template': 'default' } _in_para = self._cmd_para_to_dict(cmd_para, name_with_sign=False) _run_para.update(_in_para) if '{para}1' not in _run_para.keys(): prompt_obj.prompt_print(_('you must give the $1 para!', 'file')) return CResult(code='20999') _run_para['file'] = _run_para['{para}1'] if _run_para['save'] == '': _run_para['save'] = 'graph.pdf' # 样式模板信息处理 _style_xml = SimpleXml(_run_para['graph_config'], encoding='utf-8') _style_dict = _style_xml.to_dict()['graphviz']['templates'].get( _run_para['template'], { 'graph': {}, 'node': {}, 'edge': {}, 'edge_color': {}, 'trace': {} } ) # 关联关系线颜色 _name_mapping = _style_dict['edge_color'].get('name_mapping', {}) _use_round_color = _style_dict['edge_color'].get('use_round_color', False) _round_color = _style_dict['edge_color'].get( 'round_color', '').replace(' ', '').split(',') # 生成关系字典 _graph_json = self._formatter_col_list(_run_para['file'], prompt_obj=prompt_obj) # 处理生成调用链的情况 _display_no_trace = _run_para['display_no_trace'] == 'true' if _run_para['trace_id'] != '': # 参与追踪的关系名 if _run_para['trace_relations'] is None: # 全部关系 _run_para['trace_relations'] = _graph_json['relation_name'] else: _run_para['trace_relations'] = _run_para['trace_relations'].split(',') # 获取追踪信息字典 _trace_info = self._get_trace_info( _graph_json, _run_para['trace_id'], _run_para['down_flow_first'] == 'true', _run_para['direction'], _run_para['trace_relations'] ) else: _trace_info = { 'trace_id': '', 'up_nodes': list(), 'down_nodes': list(), 'up_edges': list(), 'down_edges': list() } # 调用链的颜色配置 _attr_center_node = _style_dict.get('trace', {}).get('center_node', {}) _attr_up_node = _style_dict.get('trace', {}).get('up_node', {}) _attr_down_node = _style_dict.get('trace', {}).get('down_node', {}) _attr_up_edge = _style_dict.get('trace', {}).get('up_edge', {}) _attr_down_edge = _style_dict.get('trace', {}).get('down_edge', {}) # 处理关系线条的颜色映射 _relation_color = dict() _index = 0 for _relation_name in _graph_json['relation_name']: if _relation_name in _name_mapping: _relation_color[_relation_name] = _name_mapping[_relation_name] elif _use_round_color: _color_index = _index % len(_round_color) _relation_color[_relation_name] = _round_color[_color_index] _index += 1 else: _relation_color[_relation_name] = '' # 开始处理画图 _dot = Digraph( comment=_run_para['comment'], format=_run_para['outformat'], encoding='utf-8', directory=_run_para['temp'], graph_attr=_style_dict['graph'], node_attr=_style_dict['node'], edge_attr=_style_dict['edge'] ) # 开始画节点 _added_rank = list() # 已添加的分组 for _id, _info in _graph_json['list'].items(): if _info['rank'] != '': # 有分组处理 if _info['rank'] not in _added_rank: _added_rank.append(_info['rank']) with _dot.subgraph() as _sub_dot: _sub_dot.attr(rank='same') for _sub_id in _graph_json['rank'][_info['rank']]: # 节点颜色设置 if _sub_id == _trace_info['trace_id']: # 中心节点 _attr = _attr_center_node elif _sub_id in _trace_info['up_nodes']: _attr = _attr_up_node elif _sub_id in _trace_info['down_nodes']: _attr = _attr_down_node elif _display_no_trace and _trace_info['trace_id'] != '': # 不在链中且不应显示 continue else: _attr = {} _sub_dot.node( _sub_id, label=_graph_json['list'][_sub_id]['name'], **_attr ) else: # 节点颜色设置 if _id == _trace_info['trace_id']: # 中心节点 _attr = _attr_center_node elif _id in _trace_info['up_nodes']: _attr = _attr_up_node elif _id in _trace_info['down_nodes']: _attr = _attr_down_node elif _display_no_trace and _trace_info['trace_id'] != '': # 不在链中且不应显示 continue else: _attr = {} _dot.node(_id, label=_info['name'], **_attr) # 开始画关联线 for _id, _info in _graph_json['list'].items(): for _key, _list in _info.items(): if _key in ('name', 'rank'): continue # 判断颜色 _edge_color = _relation_color[_key] for _temp_id in _list: if _temp_id == '': continue if _run_para['direction'] == 'reverse': _head_id = _temp_id _tail_id = _id else: _head_id = _id _tail_id = _temp_id # 处理颜色 _findstr = '%s&&%s' % (_head_id, _tail_id) if _findstr in _trace_info['up_edges']: _attr = _attr_up_edge elif _findstr in _trace_info['down_edges']: _attr = _attr_down_edge elif _display_no_trace and _trace_info['trace_id'] != '': continue else: _attr = {'color': _edge_color} _dot.edge(_head_id, _tail_id, **_attr) # 保存图片 _dir, _filename = os.path.split(os.path.abspath(_run_para['save'])) _filename = FileTool.get_file_name_no_ext(_filename) _dot.render( filename=_filename, directory=_dir, format=_run_para['outformat'], view=(_run_para['view'] == 'true'), cleanup=(_run_para['cleanup'] == 'true') ) except Exception as e: _prin_str = '%s (%s):\n%s' % ( _('execution exception'), str(e), traceback.format_exc() ) prompt_obj.prompt_print(_prin_str) return CResult(code='20999') # 结束 return _ok_result
import sys from HiveNetLib.base_tools.file_tool import FileTool from HiveNetLib.simple_xml import SimpleXml # 根据当前文件路径将包路径纳入,在非安装的情况下可以引用到 sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) from chat_robot.lib.data_manager import QAManager from chat_robot.lib.qa import QA from chat_robot.lib.loader import QAServerLoader # 数据管理模块 _file_path = os.path.realpath(FileTool.get_file_path(__file__)) _execute_path = os.path.join(_file_path, os.path.pardir, 'chat_robot') # _config = os.path.join(_execute_path, os.path.pardir, 'chat_robot/conf/server.xml') _config = os.path.join(_execute_path, './conf/server.xml') _config_xml = SimpleXml(_config, encoding='utf-8') SERVER_CONFIG = _config_xml.to_dict()['server'] SERVER_CONFIG['debug'] = True SERVER_CONFIG['config'] = _config SERVER_CONFIG['encoding'] = 'utf-8' SERVER_CONFIG['execute_path'] = _execute_path # 装载服务 _loader = QAServerLoader(SERVER_CONFIG) _qa_manager: QAManager = _loader.qa_manager # 导入Excel数据 # _qa_manager.import_questions_by_xls( # os.path.join(_execute_path, '../test/questions.xlsx'), reset_questions=True # )
def _load_msg(cls, obj, msg_id=None, obj_type=EnumMsgObjType.String, **kwargs): """ 装载报文对象,将传入的报文对象转换为基础处理对象(lxml的xml document对象) @param {object} obj - 要装载的报文载体(与obj_type结合来判断是什么对象),例如: obj_type = EnumMsgObjType.File 时,obj为文件路径 obj_type = EnumMsgObjType.String 时,obj为报文文本 @param {EnumMsgObjType} msg_id=None - 报文id(用于标明该报文是什么报文) @param {object} obj_type=EnumMsgObjType.String - 报文载体对象类型 @param {**kwargs} kwargs - 装载参数,包括: encoding='utf-8' - 装载字符编码 has_sign_info=False - 是否带有签字验证信息(在xml文档结尾),如果有,转换时会去除 sign_begin_tag='' - 当有签字验证信息时,标记验证信息开头字符串,用于删除验证信息 register_namespace - 注册命名空间别名,格式为: {prefix: uri, prefix: uri, ... } 其中prefix和uri都为字符串 注册命名空间后,后续的节点就可以通过tag='{uri}tagname'的方式添加带命名空间的节点 其余参数为SimpleXml的初始化函数的kwargs参数定义 @returns {object} - 报文体对象(SimpleXml对象) @throws {UnboundLocalError} - 对应标准错误码21001,当遇到obj_type不支持时抛出 """ _msg = None if obj_type in (EnumMsgObjType.File, EnumMsgObjType.String, EnumMsgObjType.Bytes): # 字符串方式及二进制 _xmlstr = '' _encoding = cls._get_para_from_kwargs('encoding', default_value='utf-8', kwargs=kwargs) _has_sign_info = cls._get_para_from_kwargs('has_sign_info', default_value=False, kwargs=kwargs) _sign_begin_tag = cls._get_para_from_kwargs('sign_begin_tag', default_value='', kwargs=kwargs) if obj_type == EnumMsgObjType.File: with open(obj, 'rt', encoding=_encoding) as f: _xmlstr = f.read() elif obj_type == EnumMsgObjType.Bytes: _xmlstr = obj.decode(_encoding) else: _xmlstr = obj if _has_sign_info: # 要删除掉最后面的签名信息 _sign_begin = _xmlstr.rfind(_sign_begin_tag) if _sign_begin != -1: _xmlstr = _xmlstr[0:_sign_begin] # 生成对象 _msg = SimpleXml(_xmlstr, obj_type=EnumXmlObjType.String, encoding=_encoding, use_chardet=False, register_namespace=None, **kwargs) else: # 不支持的格式 raise UnboundLocalError # 返回结果 return _msg
class HtmlParser(object): """ Html解析器 基于lxml实现的html文档的解析,元素查找方式与 WebDriver 相似,以便解析功能的兼容 """ def __init__(self, html: str, use_xpath2=False): """ html解析器 @param {str} html - 要解析的html代码 @param {bool} use_xpath2=False - 是否使用xpath2.0,默认为False """ self.html_doc = SimpleXml(html, obj_type=EnumXmlObjType.String, parser='html', use_xpath2=use_xpath2) ############################# # 元素查找 ############################# def find_elements(self, steps: list, parent: HtmlElement = None) -> list: """ 按步骤逐级查询对象 @param {list} steps - 要查询的步骤列表,每一个步骤为一个操作列表,第0个为操作类型,后面为操作参数 [ ['pos': 0], # 获取当前清单中指定位置的元素 ['children'], # 获取当前清单元素中的所有子元素 ['id', 'myId'], # 通过id获取元素 ['xpath', '//img[@id="dracga" and @style]'], # 通过xpaht获取元素 ['name', 'myName'], # 通过元素的name属性获取 ['tag_name', 'img'], # 通过元素的标签名获取 ['class_name', 'styleClass'], # 通过元素的class名获取 ['css_selector', '#kw'], # 通过css选择器方式获取,id用#kw, class用.s_ipt, 与css的简写方式相同 ['link_text', 'hao123'], # 通过超文本链接上的文字信息来定位元素 ['partial_link_text', 'hao'] # 通过超文本连接上的文字的一部分关键字信息来定位元素 ] @param {HtmlElement} parent=None - 开始的父节点,如果不传代表重新开始 @returns {list} - 返回查找到的对象列表 注:对象类型为 HtmlElement """ # 转换为底层类型 _parent = None if parent is not None: _parent = parent.element # 查找第一个步骤 _elements = self._find_elements_step(steps[0], parent=_parent) # 循环处理后面每个步骤(从第2个开始) for _i in range(1, len(steps)): if len(_elements) == 0: # 没有找到任何元素,直接退出循环 break # 执行子元素的查找处理 _step = steps[_i] _op = _step[0] _new_elements = list() if _op == 'pos': # 获取当前列表中指定位置的元素 _new_elements.append(_elements[_step[1]]) else: # 遍历每个元素进行查找 for _element in _elements: _new_elements.extend( self._find_elements_step(_step, parent=_element)) # 更新清单 _elements = _new_elements # 转换为标准类型并返回结果 _trans_list = list() for _element in _elements: _trans_list.append(HtmlElement(_element)) return _trans_list ############################# # 内部函数 ############################# def _find_elements_step(self, step: list, parent: ET._Element = None) -> list: """ 单步查找元素 @param {list} step - 执行步骤参数,参数数组中的第0个是查找指令,其他数组对象是查找参数 ['children'] # 获取当前清单元素中的所有子元素 ['id', 'myId'] # 通过id获取元素 ['xpath', '//img[@id="dracga" and @style]'] # 通过xpaht获取元素 ['name', 'myName'] # 通过元素的name属性获取 ['tag_name', 'img'] # 通过元素的标签名获取 ['class_name', 'styleClass'] # 通过元素的class名获取 ['css_selector', '#kw'] # 通过css选择器方式获取,id用#kw, class用.s_ipt, 与css的简写方式相同 注:暂不支持 ['link_text', 'hao123'] # 通过超文本链接上的文字信息来定位元素 ['partial_link_text', 'hao'] # 通过超文本连接上的文字的一部分关键字信息来定位元素 @param {lxml.etree._Element} parent=None - 父节点,如果不传代表全局搜索 @returns {list} - 返回查找到的对象列表 注:对象类型为 lxml.etree._Element """ # 处理查找操作 _elements = list() _op = step[0] # 查找指令 if _op == 'children': # 获取所有子元素 _elements = self._find_elements_by_xpath('.//*', parent=parent) elif _op == 'id': # 通过id查找 _elements = self._find_elements_by_xpath('.//*[@id="%s"]' % step[1], parent=parent) elif _op == 'xpath': _elements = self._find_elements_by_xpath(step[1], parent=parent) elif _op == 'name': _elements = self._find_elements_by_xpath('.//*[@name="%s"]' % step[1], parent=parent) elif _op == 'tag_name': _elements = self._find_elements_by_xpath('.//%s' % step[1], parent=parent) elif _op == 'class_name': _elements = self._find_elements_by_xpath( './/*[@class="{0}" or starts-with(@class, "{0} ") or contains(@class, " {0} ") or ends-with(@class, " {0}")]' .format(step[1]), parent=parent) elif _op == 'link_text': _elements = self._find_elements_by_xpath( './/*[@href and text()="%s"]' % step[1], parent=parent) elif _op == 'partial_link_text': _elements = self._find_elements_by_xpath( './/*[@href and contains(string(), "%s")]' % step[1], parent=parent) elif _op == 'css_selector': if parent is None: parent = self.html_doc.root _elements = parent.cssselect(step[1]) else: # 没有匹配到类型 raise KeyError('not support find elements operator [%s]' % _op) return _elements def _find_elements_by_xpath(self, xpath: str, parent: ET._Element = None) -> list: """ 通过XPath获取元素 @param {str} xpath - xpath字符串 @param {lxml.etree.Element} parent=None - 父节点 @returns {list} - 返回查找到的对象列表 注:对象类型为 lxml.etree.Element """ if parent is None: # 全局查找 return self.html_doc.get_nodes(xpath) else: # 查找子节点 return self.html_doc.get_childnodes_on_node(parent, xpath)