def white_black_valid(self): """ 检查指定文件是否符合白名单, 黑名单验证 """ ds_storage_option = self._ds_storage.value_by_name( 0, 'dstotheroption', None) if ds_storage_option == '' or ds_storage_option is None: return True dir_filter_white_list = CJson.json_attr_value( ds_storage_option, self.Path_SO_Inbound_Filter_Dir_WhiteList, '') dir_filter_black_list = CJson.json_attr_value( ds_storage_option, self.Path_SO_Inbound_Filter_Dir_BlackList, '') file_filter_white_list = CJson.json_attr_value( ds_storage_option, self.Path_SO_Inbound_Filter_File_WhiteList, '') file_filter_black_list = CJson.json_attr_value( ds_storage_option, self.Path_SO_Inbound_Filter_File_BlackList, '') result = True if self.file_type != self.FileType_Unknown: if (dir_filter_white_list != '') and (dir_filter_black_list != ''): result = CFile.file_match( self.file_path_with_rel_path, dir_filter_white_list) and (not CFile.file_match( self.file_path_with_rel_path, dir_filter_black_list)) elif dir_filter_white_list != '': result = CFile.file_match(self.file_path_with_rel_path, dir_filter_white_list) elif dir_filter_black_list != '': result = not CFile.file_match(self.file_path_with_rel_path, dir_filter_black_list) if not result: return result if self.file_type == self.FileType_File: if (file_filter_white_list != '') and (file_filter_black_list != ''): return CFile.file_match( self.file_name_without_path, file_filter_white_list) and (not CFile.file_match( self.file_name_without_path, file_filter_black_list)) elif file_filter_white_list != '': return CFile.file_match(self.file_name_without_path, file_filter_white_list) elif file_filter_black_list != '': return not CFile.file_match(self.file_name_without_path, file_filter_black_list) else: return True else: return True
def params_value_by_name(self, attr_name: str, default_value): """ 通过解析传入参数, 直接获取任务执行方面的参数, 该参数都存储在job对象下 :param attr_name: :param default_value: :return: """ if self.__params is None: return default_value else: return CJson.json_attr_value( self.__params, '{0}.{1}'.format(self.NAME_JOB, attr_name), default_value)
def execute(self) -> str: inbound_ib_n_list = CFactory().give_me_db( self.get_mission_db_id()).all_row(''' select dsiid as query_ib_id , dsiotheroption as query_ib_option , dsidirectory as query_ib_relation_dir , dsidirectoryid as query_ib_dir_id , dsitargetstorageid as query_ib_target_storage_id from dm2_storage_inbound where dsi_na_status = {0} '''.format(self.ProcStatus_WaitConfirm)) if inbound_ib_n_list.is_empty(): return CResult.merge_result(CResult.Success, '本次没有需要检查的通知任务!') for data_index in range(inbound_ib_n_list.size()): ds_ib_id = inbound_ib_n_list.value_by_name(data_index, 'query_ib_id', '') ds_ib_option = CUtils.any_2_str( inbound_ib_n_list.value_by_name(data_index, 'query_ib_option', '')) ds_ib_directory_name = inbound_ib_n_list.value_by_name( data_index, 'query_ib_relation_dir', '') ds_ib_directory_id = inbound_ib_n_list.value_by_name( data_index, 'query_ib_dir_id', '') ds_ib_target_storage_id = inbound_ib_n_list.value_by_name( data_index, 'query_ib_target_storage_id', '') module_name_list = CJson.json_attr_value( ds_ib_option, self.Path_IB_Opt_Notify_module, None) if module_name_list is None: modules_root_dir = CSys.get_metadata_data_access_modules_root_dir( ) module_file_list = CFile.file_or_subpath_of_path( modules_root_dir, '{0}_*.{1}'.format(self.Name_Module, self.FileExt_Py)) module_name_list = list() for module_file in module_file_list: module_name_list.append(CFile.file_main_name(module_file)) CLogger().debug('正在检查入库批次[ds_ib_id]的通知进度...'.format(ds_ib_id)) try: # 所有通知对象的统计数 sql_record_total_count = CUtils.replace_placeholder( ''' select count(*) from dm2_storage_obj_na where dson_app_id in ($module_name_list) and dson_object_id in ( select dsoid from dm2_storage_object where dso_ib_id = :ib_id ) ''', { 'module_name_list': CUtils.list_2_str(module_name_list, "'", ',', "'") }) record_total_count = CFactory().give_me_db( self.get_mission_db_id()).one_value( sql_record_total_count, {'ib_id': ds_ib_id}, 0) if record_total_count == 0: self.update_inbound_na_result( ds_ib_id, CResult.merge_result( self.Failure, '入库任务下没有可通知的数据, 请检查异常情况! '.format(ds_ib_id))) continue # 已经完成的通知对象的统计数, 包括正常完成和错误的 sql_record_finished_count = CUtils.replace_placeholder( ''' select count(*) from dm2_storage_obj_na where dson_notify_status in ({0}, {1}) and dson_app_id in ($module_name_list) and dson_object_id in ( select dsoid from dm2_storage_object where dso_ib_id = :ib_id ) '''.format(self.ProcStatus_Finished, self.ProcStatus_Error), { 'module_name_list': CUtils.list_2_str(module_name_list, "'", ',', "'") }) record_finished_count = CFactory().give_me_db( self.get_mission_db_id()).one_value( sql_record_finished_count, {'ib_id': ds_ib_id}, 0) # 错误的记录数 sql_record_error_count = CUtils.replace_placeholder( ''' select count(*) from dm2_storage_obj_na where dson_notify_status = {0} and dson_app_id in ($module_name_list) and dson_object_id in ( select dsoid from dm2_storage_object where dso_ib_id = :ib_id ) '''.format(self.ProcStatus_Error), { 'module_name_list': CUtils.list_2_str(module_name_list, "'", ',', "'") }) record_error_count = CFactory().give_me_db( self.get_mission_db_id()).one_value( sql_record_error_count, {'ib_id': ds_ib_id}, 0) if record_total_count != record_finished_count: message = '入库任务[{0}]下的数据正在通知其他子系统, 共有[{1}]个, 已处理[{2}]个, 失败[{3}]个...'.format( ds_ib_id, record_total_count, record_finished_count, record_error_count) CLogger().debug(message) self.update_inbound_na_progress( ds_ib_id, CResult.merge_result(self.Failure, message)) else: message = '入库任务[{0}]下的数据已经通知其他子系统, 共有[{1}]个, 已处理[{2}]个, 失败[{3}]个, 请检查修正! '.format( ds_ib_id, record_total_count, record_finished_count, record_error_count) CLogger().debug(message) self.update_inbound_na_result( ds_ib_id, CResult.merge_result(self.Success, message)) except Exception as error: self.update_inbound_na_result( ds_ib_id, CResult.merge_result( self.Failure, '入库任务下的数据通知其他子系统过程中出现异常情况, 详细错误信息为: [{1}]'.format( ds_ib_id, error.__str__()))) continue return CResult.merge_result(self.Success, '本次通知监控任务成功结束!')
def result_message(cls, result_text) -> str: return CJson.json_attr_value(result_text, cls.Name_Message, '')
def result_info(cls, result_text, info_name: str, default_value): return CJson.json_attr_value(result_text, info_name, default_value)
def result_success(cls, result_text) -> bool: return CJson.json_attr_value(result_text, cls.Name_Result, cls.Failure) == cls.Success
def test_get_chn_attr_by_class_method(self): value = CJson.json_attr_value(self.test_text, '中文属性', 1) assert value == 5
def process_mission(self, dataset) -> str: """ :param dataset: :return: """ ds_storage_id = dataset.value_by_name(0, 'query_storage_id', '') ds_storage_title = dataset.value_by_name(0, 'query_storage_title', '') ds_ib_id = dataset.value_by_name(0, 'query_ib_id', '') ds_ib_directory_name = dataset.value_by_name(0, 'query_ib_relation_dir', '') ds_ib_batch_no = dataset.value_by_name(0, 'query_ib_batchno', '') ds_ib_option = CUtils.any_2_str( dataset.value_by_name(0, 'query_ib_option', '')) CLogger().debug('与第三方模块同步的目录为: {0}.{1}'.format(ds_ib_id, ds_ib_directory_name)) data_count = 0 try: module_name_list = CJson.json_attr_value( ds_ib_option, self.Path_IB_Opt_Notify_module, None) if module_name_list is None: modules_root_dir = CSys.get_metadata_data_access_modules_root_dir( ) module_file_list = CFile.file_or_subpath_of_path( modules_root_dir, '{0}_*.{1}'.format(self.Name_Module, self.FileExt_Py)) module_name_list = list() for module_file in module_file_list: module_name_list.append(CFile.file_main_name(module_file)) sql_ib_need_notify_object = ''' select dsoid, dsoobjecttype, dsoobjectname, dso_da_result from dm2_storage_object where dso_ib_id = :ib_id ''' dataset = CFactory().give_me_db(self.get_mission_db_id()).all_row( sql_ib_need_notify_object, {'ib_id': ds_ib_id}) if dataset.is_empty(): result = CResult.merge_result( self.Success, '存储[{0}]下, 批次为[{1}]的目录[{2}]下无任何对象, 不再通知给第三方应用!'.format( ds_storage_title, ds_ib_batch_no, ds_ib_directory_name)) self.update_notify_result(ds_ib_id, result) return result CLogger().debug( '存储[{0}]下, 批次为[{1}]的目录[{2}]下有[{3}]个对象等待通知给第三方应用!'.format( ds_storage_title, ds_ib_batch_no, ds_ib_directory_name, dataset.size())) data_count = dataset.size() error_message = '' for data_index in range(data_count): record_object = dataset.record(data_index) object_id = CUtils.dict_value_by_name(record_object, 'dsoid', '') object_type = CUtils.dict_value_by_name( record_object, 'dsoobjecttype', '') object_name = CUtils.dict_value_by_name( record_object, 'dsoobjectname', '') object_da_result_text = CUtils.any_2_str( CUtils.dict_value_by_name(record_object, 'dso_da_result', '')) object_da_result = CJson() object_da_result.load_json_text(object_da_result_text) for module_name in module_name_list: module_obj = CObject.create_module_instance( CSys.get_metadata_data_access_modules_root_name(), module_name, self.get_mission_db_id()) module_id = module_name module_title = CUtils.dict_value_by_name( module_obj.information(), self.Name_Title, '') module_enable = CUtils.dict_value_by_name( module_obj.information(), self.Name_Enable, True) if not module_enable: continue module_access = object_da_result.xpath_one( '{0}.{1}'.format(module_id, self.Name_Result), self.DataAccess_Forbid) module_access_memo = object_da_result.xpath_one( '{0}.{1}'.format(module_id, self.Name_Message), '') CLogger().debug( '存储[{0}]下, 批次为[{1}]的目录[{2}]下的对象[{3}], 与模块[{4}]的访问权限为[{5}]!' .format(ds_storage_title, ds_ib_batch_no, ds_ib_directory_name, object_name, module_title, module_access)) # todo(王西亚) 仔细考虑这里是否要放开, 是放开pass的, 还是放开pass和wait!!!!!! # if not \ # ( # CUtils.equal_ignore_case(module_access, self.DataAccess_Pass) # or CUtils.equal_ignore_case(module_access, self.DataAccess_Wait) # ): # continue result = module_obj.notify_object(ds_ib_id, module_access, module_access_memo, object_id, object_name, object_type, None) if not CResult.result_success(result): message = CResult.result_message(result) CLogger().debug( '存储[{0}]下, 批次为[{1}]的目录[{2}]下的对象[{3}], 与模块[{4}]的通知处理结果出现错误, 详细情况: [{5}]!' .format(ds_storage_title, ds_ib_batch_no, ds_ib_directory_name, object_name, module_title, message)) error_message = CUtils.str_append( error_message, message) if CUtils.equal_ignore_case(error_message, ''): result = CResult.merge_result( self.Success, '存储[{0}]下, 批次为[{1}]的目录[{2}]下有[{3}]个对象成功通知给第三方应用!'.format( ds_storage_title, ds_ib_batch_no, ds_ib_directory_name, data_count)) self.update_notify_result(ds_ib_id, result) return result else: result = CResult.merge_result( self.Failure, '存储[{0}]下, 批次为[{1}]的目录[{2}]下有[{3}]个对象在通知给第三方应用时, 部分出现错误! 错误信息如下: \n{4}' .format(ds_storage_title, ds_ib_batch_no, ds_ib_directory_name, data_count, error_message)) self.update_notify_result(ds_ib_id, result) return result except Exception as error: result = CResult.merge_result( self.Failure, '存储[{0}]下, 批次为[{1}]的目录[{2}]下有[{3}]个对象通知给第三方应用时出现异常! 错误原因为: {4}!' .format(ds_storage_title, ds_ib_batch_no, ds_ib_directory_name, data_count, error.__str__())) self.update_notify_result(ds_ib_id, result) return result
def process_mission(self, dataset): dso_id = dataset.value_by_name(0, 'dsoid', '') dso_data_type = dataset.value_by_name(0, 'dsodatatype', '') dso_object_type = dataset.value_by_name(0, 'dsoobjecttype', '') dso_object_name = dataset.value_by_name(0, 'dsoobjectname', '') CLogger().debug('开始处理对象: {0}.{1}.{2}.{3}的元数据'.format( dso_id, dso_data_type, dso_object_type, dso_object_name)) ds_object_info = self.get_object_info(dso_id, dso_data_type) ds_object_storage_option = ds_object_info.value_by_name( 0, 'query_object_storage_option', None) if ds_object_info.value_by_name(0, 'query_object_valid', self.DB_False) == self.DB_False: CFactory().give_me_db(self.get_mission_db_id()).execute( ''' update dm2_storage_object set dsotagsparsestatus = 0 , dsolastmodifytime = now() , dsotagsparsememo = '文件或目录不存在,标签无法解析' where dsoid = :dsoid ''', {'dsoid': dso_id}) return CResult.merge_result( self.Success, '文件或目录[{0}]不存在,标签处理正常结束!'.format( ds_object_info.value_by_name(0, 'query_object_fullname', ''))) sql_get_rule = ''' select dsdScanRule from dm2_storage_directory where dsdStorageid = :dsdStorageID and Position(dsddirectory || '{0}' in :dsdDirectory) = 1 and dsdScanRule is not null order by dsddirectory desc limit 1 '''.format(CFile.sep()) rule_ds = CFactory().give_me_db(self.get_mission_db_id()).one_row( sql_get_rule, { 'dsdStorageID': ds_object_info.value_by_name(0, 'query_object_storage_id', ''), 'dsdDirectory': ds_object_info.value_by_name(0, 'query_object_relation_path', '') }) ds_rule_content = rule_ds.value_by_name(0, 'dsScanRule', '') file_info_obj = CDMFilePathInfoEx( dso_data_type, ds_object_info.value_by_name(0, 'query_object_fullname', ''), ds_object_info.value_by_name(0, 'query_object_storage_id', ''), ds_object_info.value_by_name(0, 'query_object_file_id', ''), ds_object_info.value_by_name(0, 'query_object_file_parent_id', ''), ds_object_info.value_by_name(0, 'query_object_owner_id', ''), self.get_mission_db_id(), ds_rule_content) plugins_obj = CPluginsMng.plugins(file_info_obj, dso_object_type) if plugins_obj is None: return CResult.merge_result( self.Failure, '文件或目录[{0}]的类型插件[{1}]不存在,对象详情无法解析, 处理结束!'.format( ds_object_info.value_by_name(0, 'query_object_fullname', ''), dso_object_type)) try: plugins_information = plugins_obj.get_information() tags_parser_rule = CUtils.dict_value_by_name( plugins_information, plugins_obj.Plugins_Info_TagsEngine, None) if tags_parser_rule is None: tags_parser_rule = CJson.json_attr_value( CUtils.any_2_str(ds_object_storage_option), self.Path_Setting_MetaData_Tags_Rule, None) if tags_parser_rule is None: tags_parser_rule = settings.application.xpath_one( self.Path_Setting_MetaData_Tags_Rule, None) if tags_parser_rule is None: process_result = CResult.merge_result( CResult.Success, '系统未设置标签库和识别模式, 标签解析将自动结束') else: process_result = plugins_obj.parser_tags( CTagsParser(dso_id, dso_object_name, file_info_obj, dataset.value_by_name(0, 'dsoaliasname', ''), tags_parser_rule)) self.db_update_object_status(dso_id, process_result) return process_result except Exception as err: process_result = CResult.merge_result( self.Failure, '文件或目录[{0}]对象业务分类解析过程出现错误! 错误原因为: {1}'.format( ds_object_info.value_by_name(0, 'query_object_fullname', ''), err.__str__())) self.db_update_object_status(dso_id, process_result) return process_result
def process_mission(self, dataset) -> str: """ 详细算法复杂, 参见readme.md中[### 数据入库调度]章节 :param dataset: :return: """ ds_src_storage_id = dataset.value_by_name(0, 'query_storage_id', '') ds_src_storage_type = dataset.value_by_name(0, 'query_storage_type', self.Storage_Type_Mix) ds_src_root_path = dataset.value_by_name(0, 'query_rootpath', '') ds_src_dir_id = dataset.value_by_name(0, 'query_ib_dir_id', '') ds_ib_id = dataset.value_by_name(0, 'query_ib_id', '') ds_ib_directory_name = dataset.value_by_name(0, 'query_ib_relation_dir', '') ds_ib_batch_no = dataset.value_by_name(0, 'query_ib_batchno', '') ds_ib_option = dataset.value_by_name(0, 'query_ib_option', '') src_need_storage_size = self.get_storage_size(ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_ib_option) src_path = ds_src_root_path if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): src_path = CFile.join_file(src_path, ds_ib_directory_name) src_dataset_metadata_filename = CFile.join_file( src_path, self.FileName_MetaData_Bus_21AT) CLogger().debug('入库的目录为: {0}.{1}'.format(ds_ib_id, ds_ib_directory_name)) try: # 检查所有文件与元数据是否相符 all_ib_file_or_path_existed = self.check_all_ib_file_or_path_existed( ds_ib_id) if not CResult.result_success(all_ib_file_or_path_existed): self.update_ib_result(ds_ib_id, all_ib_file_or_path_existed) return all_ib_file_or_path_existed # 将数据入库的记录保存到日志中 result = self.ib_log(ds_ib_id, ds_src_storage_id, ds_ib_directory_name) if not CResult.result_success(result): self.update_ib_result(ds_ib_id, result) return result # 如果是在核心存储或混合存储中直接入库, 则仅仅改变元数据状态即可 if CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Mix) \ or CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Core): result_ib_in_core_or_mix_storage = self.update_ib_data_status_in_core_or_mix_storage( ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_src_dir_id) self.update_ib_result(ds_ib_id, result_ib_in_core_or_mix_storage) return result_ib_in_core_or_mix_storage # 加载目录下的待入库数据集的元数据文件 src_dataset_xml = CXml() src_dataset_type = self.Name_Default if CFile.file_or_path_exist(src_dataset_metadata_filename): src_dataset_xml.load_file(src_dataset_metadata_filename) src_dataset_type = CXml.get_element_text( src_dataset_xml.xpath_one(self.Path_MD_Bus_ProductType)) if CUtils.equal_ignore_case(src_dataset_type, ''): src_dataset_type = self.Name_Default # 获取匹配的入库模式 src_ib_schema = self.get_ib_schema(src_dataset_type, ds_ib_option) if src_ib_schema is None: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]的数据集类型为[{2}], 未找到匹配的入库模式, 请检查修正后重试!'.format( ds_ib_id, ds_ib_directory_name, src_dataset_type)) self.update_ib_result(ds_ib_id, result) return result # 计算入库的目标存储\存储根目录\目标子目录在目标存储中的副目录的标识\目标子目录\反馈消息 dest_ib_storage_id, dest_ib_root_path, desc_ib_dir_id, dest_ib_subpath, message = self.get_dest_storage( ds_ib_batch_no, src_need_storage_size, ds_ib_option, src_ib_schema, src_dataset_xml) if dest_ib_storage_id is None or dest_ib_subpath is None: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result dest_ib_subpath = CFile.unify(dest_ib_subpath) if CJson.json_attr_value(ds_ib_option, self.Path_IB_Switch_CheckFileLocked, self.DB_False) == self.DB_True: src_ib_files_not_locked, message = self.check_src_ib_files_not_locked( ds_src_root_path, src_path) if not src_ib_files_not_locked: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result proc_ib_src_path = ds_src_root_path proc_ib_dest_path = dest_ib_root_path if not CUtils.equal_ignore_case(dest_ib_subpath, ''): proc_ib_dest_path = CFile.join_file(dest_ib_root_path, dest_ib_subpath) if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): proc_ib_src_path = CFile.join_file(proc_ib_src_path, ds_ib_directory_name) proc_ib_dest_path = CFile.join_file(proc_ib_dest_path, ds_ib_directory_name) # --------------------------------------------------------------至此, 数据入库前的检查处理完毕 # 移动源目录至目标目录, 如果是根目录, 则仅仅移动文件 result = self.ib_files_move( proc_ib_src_path, proc_ib_dest_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}\n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result # 将源文件的元数据, 移动至目标存储下, 如果出现异常, 则在方法内部rollback result = self.src_ib_metadata_move_to_storage( ds_ib_id, ds_src_storage_id, ds_src_dir_id, ds_ib_directory_name, dest_ib_storage_id, desc_ib_dir_id, dest_ib_subpath) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}/n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result result = CResult.merge_result( self.Success, '目录为[{0}.{1}]入库成功!'.format(ds_ib_id, ds_ib_directory_name)) self.update_ib_result(ds_ib_id, result) return result except Exception as error: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]入库出现异常! 错误原因为: {2}'.format( ds_ib_id, ds_ib_directory_name, error.__str__())) self.update_ib_result(ds_ib_id, result) return result