Exemplo n.º 1
0
    def classified(self):
        self._object_confirm = self.Object_Confirm_IUnKnown
        self._object_name = None

        current_path = self.file_info.file_name_with_full_path
        metadata_file_name = CFile.join_file(current_path,
                                             self.FileName_MetaData_Bus_21AT)
        if CFile.file_or_path_exist(metadata_file_name):
            self.__bus_metadata_xml_file_name__ = metadata_file_name
            self.__metadata_xml_obj__ = CXml()
            try:
                self.__metadata_xml_obj__.load_file(metadata_file_name)
                self.__classified_object_type = CXml.get_element_text(
                    self.__metadata_xml_obj__.xpath_one(
                        self.Path_21AT_MD_Content_ProductType))

                if CUtils.equal_ignore_case(
                        self.__classified_object_type,
                        CUtils.dict_value_by_name(self.get_information(),
                                                  self.Plugins_Info_Type,
                                                  None)):
                    self._object_confirm = self.Object_Confirm_IKnown
                    self._object_name = CXml.get_element_text(
                        self.__metadata_xml_obj__.xpath_one(
                            self.Path_21AT_MD_Content_ProductName))
            except:
                self.__metadata_xml_obj__ = None
                CLogger().warning('发现文件{0}符合二十一世纪业务数据集标准, 但该文件格式有误, 无法打开! ')

        return self._object_confirm, self._object_name
Exemplo n.º 2
0
    def __plugins_classified_by_plugin_node_list__(
            cls, file_info: CDMFilePathInfoEx,
            plugin_node_list: list) -> CPlugins:
        """
        根据外部给定的插件xml节点数组, 顺序进行识别, 返回第一个识别出文件的插件对象
        :param file_info:
        :param plugin_node_list:
        :return:
        """
        for plugin_node in plugin_node_list:
            plugin_id = CXml.get_element_text(plugin_node)
            class_classified_obj = cls.plugins(file_info, plugin_id)

            if class_classified_obj is None:
                continue

            try:
                object_confirm, object_name = class_classified_obj.classified()
                if object_confirm != cls.Object_Confirm_IUnKnown:
                    CLogger().debug('{0} is classified as {1}.{2}'.format(
                        file_info.file_main_name,
                        class_classified_obj.get_information(),
                        class_classified_obj.get_id()))
                    return class_classified_obj
            except:
                CLogger().debug('插件[{0}]解析出现异常, 请检查!'.format(plugin_id))
                continue
        else:
            return None
Exemplo n.º 3
0
    def get_information(self) -> dict:
        information = super().get_information()
        if self.__metadata_xml_obj__ is not None:
            information[self.Plugins_Info_Title] = CXml.get_element_text(
                self.__metadata_xml_obj__.xpath_one(
                    self.Path_21AT_MD_Content_ProductName))
        information[self.Plugins_Info_Type_Code] = None  # '110001'
        information[
            self.Plugins_Info_Group] = self.DataGroup_Industry_Land_DataSet
        information[self.Plugins_Info_Group_Title] = self.data_group_title(
            information[self.Plugins_Info_Group])
        information[
            self.Plugins_Info_Catalog] = self.DataCatalog_Land  # 'land'
        information[self.Plugins_Info_Catalog_Title] = self.data_catalog_title(
            information[self.Plugins_Info_Catalog])  # '国土行业'
        information[self.Plugins_Info_MetaDataEngine] = None
        information[self.Plugins_Info_BusMetaDataEngine] = self.Engine_Custom
        information[
            self.Plugins_Info_DetailEngine] = self.DetailEngine_Busdataset
        information[self.Plugins_Info_HasChildObj] = self.DB_True

        information[self.Plugins_Info_Is_Spatial] = self.DB_False
        information[self.Plugins_Info_Is_Dataset] = self.DB_True
        information[self.Plugins_Info_Spatial_Qa] = self.DB_False
        information[self.Plugins_Info_Time_Qa] = self.DB_True
        information[self.Plugins_Info_Visual_Qa] = self.DB_False

        return information
Exemplo n.º 4
0
    def a_xml_element(cls, audit_id, audit_title, audit_group, audit_result,
                      xml_obj: CXml, xpath: str, qa_items: dict) -> list:
        """
        判断一个xml元数据中, 指定的xpath, 对应的element, 满足 kargs参数中的检测项目
        :param audit_id:
        :param audit_title:
        :param audit_group:
        :param audit_result:
        :param xml_obj:
        :param xpath:
        :param qa_items:
        :return:
        """
        result_dict = cls.__init_audit_dict__(audit_id, audit_title,
                                              audit_group, audit_result)
        if xml_obj is None:
            result_dict[cls.Name_Message] = 'XML对象不合法, 节点[{0}]不存在'.format(
                xpath)
            return [result_dict]

        element_obj = xml_obj.xpath_one(xpath)
        if element_obj is not None:
            element_text = CXml.get_element_text(element_obj)
            return cls.__a_check_value__(result_dict, element_text,
                                         '属性[{0}]'.format(audit_title),
                                         qa_items)
        else:
            result_dict[cls.Name_Message] = 'XML对象的节点[{0}]不存在, 请检查修正!'.format(
                xpath)
            return [result_dict]
Exemplo n.º 5
0
 def metadata_bus_2_params(self, metadata_xml: CXml, params: dict):
     metadata_list = metadata_xml.xpath('{0}/*'.format(
         self.Path_MD_Bus_Root))
     for metadata_item in metadata_list:
         metadata_item_name = CXml.get_element_name(
             metadata_item).lower().strip()
         metadata_item_value = CXml.get_element_text(
             metadata_item).lower().strip()
         params[metadata_item_name] = metadata_item_value
Exemplo n.º 6
0
 def rule_id(self, default_value):
     if self.__rule_content == '':
         return default_value
     else:
         xml_obj = CXml()
         try:
             xml_obj.load_xml(self.__rule_content)
             return CXml.get_element_text(
                 xml_obj.xpath_one(self.Path_21AT_MD_Content_ProductType))
         except:
             return default_value
Exemplo n.º 7
0
 def test_get_element_text(self):
     """
     获取一个节点的文本
     :return:
     """
     xml_content = '''<root name="hello world"><element>world</element></root>'''
     xml = CXml()
     xml.load_xml(xml_content)
     element = xml.xpath('/root/element')[0]
     text = CXml.get_element_text(element)
     assert text == 'world'
Exemplo n.º 8
0
    def plugins_classified(cls, file_info: CDMFilePathInfoEx) -> CPlugins:
        """
        插件识别
        1. 首先检查file_info的__rule_content__中, 有无优先识别插件列表, 有则使用该列表按顺序进行识别
        1. 其次, 检查file_info的__rule_content__是否为空, 如果不为空, 则获取其类型, 如果类型不为空, 则按类型, 匹配系统配置
        1. 其次, 检查应用系统配置中, 有无对目录识别插件的特殊设置, 有则按设置中的列表进行识别
        1. 最后按系统插件目录下的顺序, 对数据进行识别
        :param file_info:
        :return:
        """
        class_classified_obj = None
        # 根据rule_content, 获取入库规则文件的特定插件列表
        if file_info.file_type == cls.FileType_Dir:
            plugin_node_list = CXml.xml_xpath(file_info.rule_content,
                                              cls.Path_MD_Rule_Plugins_Dir)
        else:
            plugin_node_list = CXml.xml_xpath(file_info.rule_content,
                                              cls.Path_MD_Rule_Plugins_File)

        if len(plugin_node_list) > 0:
            # 根据指定的插件列表, 尝试进行识别
            class_classified_obj = cls.__plugins_classified_by_plugin_node_list__(
                file_info, plugin_node_list)

            if class_classified_obj is not None:
                return class_classified_obj

        # 如果入库规则文件中没有指定插件列表, 则读取数据入库规则类型, 它==数据集类型
        rule_type = CXml.get_element_text(
            CXml.xml_xpath_one(file_info.rule_content, cls.Path_MD_Rule_Type))
        if not CUtils.equal_ignore_case(rule_type, ''):
            # 如果有数据入库规则文件, 则获取其规则类型, 匹配setting中的类型
            plugins_json_array = settings.application.xpath_one(
                cls.Path_Setting_MetaData_Plugins_Dir, None)
            if plugins_json_array is not None:
                for plugins_define in plugins_json_array:
                    key_word = CUtils.any_2_str(
                        CUtils.dict_value_by_name(plugins_define,
                                                  cls.Name_Keyword, None))
                    plugin_list = CUtils.dict_value_by_name(
                        plugins_define, cls.Name_Plugin, None)
                    if plugin_list is None:
                        continue

                    if CUtils.equal_ignore_case(key_word, rule_type):
                        class_classified_obj = cls.__plugins_classified_by_plugin_list__(
                            file_info, plugin_list)
        else:
            # 如果没有数据入库规则文件, 则通过目录, 逐一匹配setting中的类型
            file_path = file_info.file_path_with_rel_path
            plugins_json_array = settings.application.xpath_one(
                cls.Path_Setting_MetaData_Plugins_Dir, None)
            if plugins_json_array is not None:
                for plugins_define in plugins_json_array:
                    key_word = CUtils.any_2_str(
                        CUtils.dict_value_by_name(plugins_define,
                                                  cls.Name_Keyword, None))
                    plugin_list = CUtils.dict_value_by_name(
                        plugins_define, cls.Name_Plugin, None)
                    if plugin_list is None:
                        continue

                    # todo(注意) 如果关键字为空, 则表明所有子目录都优先使用设置的插件列表进行识别!!!
                    if CUtils.equal_ignore_case(key_word, ''):
                        class_classified_obj = cls.__plugins_classified_by_plugin_list__(
                            file_info, plugin_list)
                    else:
                        if CFile.subpath_in_path(CUtils.any_2_str(key_word),
                                                 file_path):
                            class_classified_obj = cls.__plugins_classified_by_plugin_list__(
                                file_info, plugin_list)

                    if class_classified_obj is not None:
                        return class_classified_obj
                    else:
                        continue

        if class_classified_obj is not None:
            return class_classified_obj
        else:
            return cls.__plugins_classified_of_directory__(file_info)
Exemplo n.º 9
0
    def process_mission(self, dataset) -> str:
        """
        详细算法复杂, 参见readme.md中[### 数据入库调度]章节
        :param dataset:
        :return:
        """
        ds_src_storage_id = dataset.value_by_name(0, 'query_storage_id', '')
        ds_src_storage_type = dataset.value_by_name(0, 'query_storage_type',
                                                    self.Storage_Type_Mix)
        ds_src_root_path = dataset.value_by_name(0, 'query_rootpath', '')
        ds_src_dir_id = dataset.value_by_name(0, 'query_ib_dir_id', '')

        ds_ib_id = dataset.value_by_name(0, 'query_ib_id', '')
        ds_ib_directory_name = dataset.value_by_name(0,
                                                     'query_ib_relation_dir',
                                                     '')
        ds_ib_batch_no = dataset.value_by_name(0, 'query_ib_batchno', '')
        ds_ib_option = dataset.value_by_name(0, 'query_ib_option', '')

        src_need_storage_size = self.get_storage_size(ds_ib_id,
                                                      ds_src_storage_id,
                                                      ds_ib_directory_name,
                                                      ds_ib_option)
        src_path = ds_src_root_path
        if not CUtils.equal_ignore_case(ds_ib_directory_name, ''):
            src_path = CFile.join_file(src_path, ds_ib_directory_name)
        src_dataset_metadata_filename = CFile.join_file(
            src_path, self.FileName_MetaData_Bus_21AT)

        CLogger().debug('入库的目录为: {0}.{1}'.format(ds_ib_id,
                                                 ds_ib_directory_name))
        try:
            # 检查所有文件与元数据是否相符
            all_ib_file_or_path_existed = self.check_all_ib_file_or_path_existed(
                ds_ib_id)
            if not CResult.result_success(all_ib_file_or_path_existed):
                self.update_ib_result(ds_ib_id, all_ib_file_or_path_existed)
                return all_ib_file_or_path_existed

            # 将数据入库的记录保存到日志中
            result = self.ib_log(ds_ib_id, ds_src_storage_id,
                                 ds_ib_directory_name)
            if not CResult.result_success(result):
                self.update_ib_result(ds_ib_id, result)
                return result

            # 如果是在核心存储或混合存储中直接入库, 则仅仅改变元数据状态即可
            if CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Mix) \
                    or CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Core):
                result_ib_in_core_or_mix_storage = self.update_ib_data_status_in_core_or_mix_storage(
                    ds_ib_id, ds_src_storage_id, ds_ib_directory_name,
                    ds_src_dir_id)
                self.update_ib_result(ds_ib_id,
                                      result_ib_in_core_or_mix_storage)
                return result_ib_in_core_or_mix_storage

            # 加载目录下的待入库数据集的元数据文件
            src_dataset_xml = CXml()
            src_dataset_type = self.Name_Default
            if CFile.file_or_path_exist(src_dataset_metadata_filename):
                src_dataset_xml.load_file(src_dataset_metadata_filename)
                src_dataset_type = CXml.get_element_text(
                    src_dataset_xml.xpath_one(self.Path_MD_Bus_ProductType))
            if CUtils.equal_ignore_case(src_dataset_type, ''):
                src_dataset_type = self.Name_Default

            # 获取匹配的入库模式
            src_ib_schema = self.get_ib_schema(src_dataset_type, ds_ib_option)
            if src_ib_schema is None:
                result = CResult.merge_result(
                    self.Failure,
                    '目录为[{0}.{1}]的数据集类型为[{2}], 未找到匹配的入库模式, 请检查修正后重试!'.format(
                        ds_ib_id, ds_ib_directory_name, src_dataset_type))
                self.update_ib_result(ds_ib_id, result)
                return result

            # 计算入库的目标存储\存储根目录\目标子目录在目标存储中的副目录的标识\目标子目录\反馈消息
            dest_ib_storage_id, dest_ib_root_path, desc_ib_dir_id, dest_ib_subpath, message = self.get_dest_storage(
                ds_ib_batch_no, src_need_storage_size, ds_ib_option,
                src_ib_schema, src_dataset_xml)
            if dest_ib_storage_id is None or dest_ib_subpath is None:
                result = CResult.merge_result(self.Failure, message)
                self.update_ib_result(ds_ib_id, result)
                return result

            dest_ib_subpath = CFile.unify(dest_ib_subpath)
            if CJson.json_attr_value(ds_ib_option,
                                     self.Path_IB_Switch_CheckFileLocked,
                                     self.DB_False) == self.DB_True:
                src_ib_files_not_locked, message = self.check_src_ib_files_not_locked(
                    ds_src_root_path, src_path)
                if not src_ib_files_not_locked:
                    result = CResult.merge_result(self.Failure, message)
                    self.update_ib_result(ds_ib_id, result)
                    return result

            proc_ib_src_path = ds_src_root_path
            proc_ib_dest_path = dest_ib_root_path
            if not CUtils.equal_ignore_case(dest_ib_subpath, ''):
                proc_ib_dest_path = CFile.join_file(dest_ib_root_path,
                                                    dest_ib_subpath)

            if not CUtils.equal_ignore_case(ds_ib_directory_name, ''):
                proc_ib_src_path = CFile.join_file(proc_ib_src_path,
                                                   ds_ib_directory_name)
                proc_ib_dest_path = CFile.join_file(proc_ib_dest_path,
                                                    ds_ib_directory_name)

            # --------------------------------------------------------------至此, 数据入库前的检查处理完毕
            # 移动源目录至目标目录, 如果是根目录, 则仅仅移动文件
            result = self.ib_files_move(
                proc_ib_src_path, proc_ib_dest_path,
                CUtils.equal_ignore_case(ds_ib_directory_name, ''))
            if not CResult.result_success(result):
                # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功
                sub_result = self.ib_files_move(
                    proc_ib_dest_path, proc_ib_src_path,
                    CUtils.equal_ignore_case(ds_ib_directory_name, ''))
                if not CResult.result_success(sub_result):
                    sub_result_message = CResult.result_message(sub_result)
                    result_message = CResult.result_message(result)
                    result = CResult.merge_result(
                        self.Failure,
                        '{0}\n{1}'.format(result_message, sub_result_message))

                self.update_ib_result(ds_ib_id, result)
                return result

            # 将源文件的元数据, 移动至目标存储下, 如果出现异常, 则在方法内部rollback
            result = self.src_ib_metadata_move_to_storage(
                ds_ib_id, ds_src_storage_id, ds_src_dir_id,
                ds_ib_directory_name, dest_ib_storage_id, desc_ib_dir_id,
                dest_ib_subpath)
            if not CResult.result_success(result):
                # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功
                sub_result = self.ib_files_move(
                    proc_ib_dest_path, proc_ib_src_path,
                    CUtils.equal_ignore_case(ds_ib_directory_name, ''))
                if not CResult.result_success(sub_result):
                    sub_result_message = CResult.result_message(sub_result)
                    result_message = CResult.result_message(result)
                    result = CResult.merge_result(
                        self.Failure,
                        '{0}/n{1}'.format(result_message, sub_result_message))

                self.update_ib_result(ds_ib_id, result)
                return result

            result = CResult.merge_result(
                self.Success,
                '目录为[{0}.{1}]入库成功!'.format(ds_ib_id, ds_ib_directory_name))
            self.update_ib_result(ds_ib_id, result)
            return result
        except Exception as error:
            result = CResult.merge_result(
                self.Failure, '目录为[{0}.{1}]入库出现异常! 错误原因为: {2}'.format(
                    ds_ib_id, ds_ib_directory_name, error.__str__()))
            self.update_ib_result(ds_ib_id, result)
            return result