def __db_insert(self, ib_id): """ 将当前目录, 创建一条新记录到dm2_storage_directory表中 :return: """ sql_insert = ''' insert into dm2_storage_directory(dsdid, dsdparentid, dsdstorageid, dsddirectory, dsddirtype, dsdlastmodifytime, dsddirectoryname, dsdpath, dsddircreatetime, dsddirlastmodifytime, dsdparentobjid, dsdscanstatus, dsdscanfilestatus, dsdscandirstatus, dsd_directory_valid, dsd_ib_id) values(:dsdid, :dsdparentid, :dsdstorageid, :dsddirectory, :dsddirtype, now(), :dsddirectoryname, :dsdpath, :dsddircreatetime, :dsddirlastmodifytime, :dsdparentobjid, 1, 1, 1, -1, :ib_id) ''' params = dict() params['dsdid'] = self.my_id params['dsdparentid'] = self.parent_id params['dsdstorageid'] = self.storage_id params['dsddirectory'] = CFile.unify(self.file_name_with_rel_path) params['dsddirtype'] = self.Dir_Type_Directory params['dsddirectoryname'] = self.file_name_without_path params['dsdpath'] = CFile.unify(self.file_path_with_rel_path) params['dsddircreatetime'] = self.file_create_time params['dsddirlastmodifytime'] = self.file_modify_time params['dsdparentobjid'] = self.owner_obj_id params['ib_id'] = ib_id CFactory().give_me_db(self.db_server_id).execute(sql_insert, params)
def check_src_ib_files_not_locked(self, root_path, parent_path): """ 检测指定目录下的文件是否没有被锁定 1. 只有所有文件都没有被锁定, 则返回True 1. 如果有任何一个文件被锁定, 则返回False, 而且把文件信息写入message中返回 todo(注意) 这里检查所有文件是否被锁定, 在处理切片数据时, 效率会极慢!!! :param root_path: 根目录 :param parent_path: 父目录, 在加入提示信息中时, 需要将父目录加入到反馈信息中 :return: 1. 目录下是否全部文件都没有锁定, 都可以入库 1. 被锁定文件的名称列表 """ parent_path = CFile.join_file(root_path, parent_path) locked_file_list = CFile.find_locked_file_in_path(parent_path) more_locked_file = False max_locked_file_count = len(locked_file_list) if max_locked_file_count > 3: max_locked_file_count = 3 more_locked_file = True message = '' for locked_file in locked_file_list: message = CUtils.str_append( message, CFile.join_file(parent_path, locked_file)) if more_locked_file: message = CUtils.str_append(message, '...') if max_locked_file_count > 0: message = CUtils.str_append(message, '被其他应用占用了, 无法入库, 请检查解除锁定后重试入库! ') return max_locked_file_count == 0, message
def classified(self): """ 设计国土行业数据的dem_part_2验证规则 完成 负责人 李宪 在这里检验dem_part_2的识别规则 :return: """ super().classified() file_main_name_with_path = CFile.join_file(self.file_info.file_path, self.file_info.file_main_name) check_file_main_name_exist_tif = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Tif)) check_file_main_name_exist_bil = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Bil)) if (not check_file_main_name_exist_tif) and (not check_file_main_name_exist_bil): return self.Object_Confirm_IUnKnown, self._object_name # 判断是否有‘-’,并且为一个 if not self.file_info.file_main_name.count('-') == 1: return self.Object_Confirm_IUnKnown, self._object_name char_1 = self.file_info.file_main_name.split('-')[0] char_2 = self.file_info.file_main_name.split('-')[1] # char_1,char_2是否小数 if CUtils.text_is_decimal(char_1) is False \ or CUtils.text_is_decimal(char_2) is False: return self.Object_Confirm_IUnKnown, self._object_name if CUtils.equal_ignore_case(self.file_info.file_ext, self.Name_Tif) \ or CUtils.equal_ignore_case(self.file_info.file_ext, self.Name_Bil): self._object_confirm = self.Object_Confirm_IKnown self._object_name = self.file_info.file_main_name else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name
def set_custom_affiliated_file(self): custom_affiliated_file_list = self.get_custom_affiliated_file_character( ) if len(custom_affiliated_file_list) > 0: for affiliated_file_info in custom_affiliated_file_list: affiliated_file_path = CUtils.dict_value_by_name( affiliated_file_info, self.Name_FilePath, None) regex_match = CUtils.dict_value_by_name( affiliated_file_info, self.Name_RegularExpression, None) no_match = CUtils.dict_value_by_name( affiliated_file_info, self.Name_No_Match_RegularExpression, None) if (affiliated_file_path is not None) and (regex_match is not None): affiliated_file_name_list = CFile.file_or_dir_fullname_of_path( affiliated_file_path, False, regex_match, CFile.MatchType_Regex) # 模糊匹配文件列表 if len(affiliated_file_name_list) > 0: for affiliated_file_name in affiliated_file_name_list: if no_match is None: self._object_detail_file_full_name_list.append( affiliated_file_name) else: if not CUtils.text_match_re( CFile.file_name(affiliated_file_name), no_match): self._object_detail_file_full_name_list.append( affiliated_file_name)
def add_file_to_detail_list(self, match_name): """ 设定国土行业数据三调的附属文件的验证规则(镶嵌影像) 完成 负责人 王学谦 在这里检验三调的附属文件 :return: """ file_main_name = self._object_name file_path = self.file_info.file_path # 模糊匹配附属文件 if not CUtils.equal_ignore_case(file_path, ''): match_str = '{0}*xq.*'.format(match_name) match_file_list = CFile.file_or_dir_fullname_of_path( file_path, False, match_str, CFile.MatchType_Common) for file_with_path in match_file_list: if not CUtils.equal_ignore_case( CFile.file_main_name(file_with_path), file_main_name): # 去除自身与同名文件 self.add_file_to_details(file_with_path) # 将文件加入到附属文件列表中 try: db = CFactory().give_me_db(self.file_info.db_server_id) metadata_name_before_six_name = db.one_row("select gdstitle from ro_global_dim_space " "where gdsid = '{0}'".format(match_name)) \ .value_by_name(0, 'gdstitle', None) metadata_file_name = '{0}{1}.mdb'.format( match_name, metadata_name_before_six_name) metadata_file_name_with_path = CFile.join_file( file_path, metadata_file_name) if CFile.file_or_path_exist(metadata_file_name_with_path): self.add_file_to_details(metadata_file_name_with_path) except: pass
def add_file_to_detail_list(self, match_name): """ 设定国土行业数据国情的附属文件的验证规则(镶嵌影像) 完成 负责人 王学谦 在这里检验国情的附属文件 :return: """ file_main_name = self._object_name file_path = self.file_info.file_path # 正则匹配附属文件 if not CUtils.equal_ignore_case(file_path, ''): match_str = '{0}*.*'.format(match_name) match_file_list = CFile.file_or_dir_fullname_of_path( file_path, False, match_str, CFile.MatchType_Common) match_str_main_name = r'(?i)^{0}[FMP]$'.format(match_name) # 主附属 ext_list = [ 'rar', 'zip', 'doc', 'docx', 'xls', 'xlsx', 'txt', 'xml' ] for file_with_path in match_file_list: if CUtils.equal_ignore_case( CFile.file_main_name(file_with_path), file_main_name): # 去除自身与同名文件 pass elif CUtils.text_match_re(CFile.file_main_name(file_with_path), match_str_main_name): self.add_file_to_details(file_with_path) # 将文件加入到附属文件列表中 elif CFile.file_ext(file_with_path).lower() in ext_list: self.add_file_to_details(file_with_path) else: pass
def get_plugins_instance_by_object_id(cls, db_id, object_id): """ 根据对应object_id获取识别的插件对象 """ sql_query = ''' SELECT dsoobjecttype, dsodatatype FROM dm2_storage_object WHERE dsoid = '{0}' '''.format(object_id) dataset = CFactory().give_me_db(db_id).one_row(sql_query) object_plugin_file_main_name = dataset.value_by_name(0, 'dsoobjecttype', '') # plugins_8000_dom_10 object_plugin_type = dataset.value_by_name(0, 'dsodatatype', '') # 数据类型:dir-目录;file-文件 class_classified_obj_real = None # 构建数据对象object对应的识别插件 plugins_root_package_name = '{0}.{1}'.format(CSys.get_plugins_package_root_name(), object_plugin_type) # 判断插件是否存在 plugins_root_dir = CSys.get_plugins_root_dir() plugins_type_root_dir = CFile.join_file(plugins_root_dir, object_plugin_type) plugins_file = CFile.join_file(plugins_type_root_dir, '{0}.py'.format(object_plugin_file_main_name)) if CFile.file_or_path_exist(plugins_file): class_classified_obj = cls.create_plugins_instance( plugins_root_package_name, object_plugin_file_main_name, None ) class_classified_obj_real = class_classified_obj return class_classified_obj_real
def test_file_info_list(self): return [ { self.Name_Test_File_Type: self.FileType_Dir, self.Name_Test_file_path: 'gdb数据集{0}FileGeodb.gdb'.format(CFile.sep()), self.Name_Test_object_confirm: self.Object_Confirm_IKnown, self.Name_Test_object_name: 'FileGeodb' }, { self.Name_Test_File_Type: self.FileType_Dir, self.Name_Test_file_path: 'gdb数据集{0}FileGeodb2.gdb'.format(CFile.sep()), self.Name_Test_object_confirm: self.Object_Confirm_IKnown, self.Name_Test_object_name: 'FileGeodb2' } # , # { # self.Name_Test_File_Type: self.FileType_Dir, # self.Name_Test_file_path: 'gdb数据集{0}FileGeoNone.gdb'.format(CFile.sep()), # self.Name_Test_object_confirm: self.Object_Confirm_IKnown, # self.Name_Test_object_name: 'FileGeoNone' # } ]
def qa_file_custom(self, parser: CMetaDataParser): """ 自定义的文件存在性质检, 发生在元数据解析之前 完成 负责人 王学谦 :param parser: :return: """ file_path = self.file_info.file_path letter_location = file_path.find('FenFu') shp_path = CFile.join_file(file_path[:letter_location - 1], 'FenFu', self.get_coordinate_system_title()) shp_list = CFile.file_or_subpath_of_path(shp_path, '(?i).shp$', CFile.MatchType_Regex) if len(shp_list) == 0: parser.metadata.quality.append_total_quality( { self.Name_FileName: '', self.Name_ID: 'shp_file', self.Name_Title: '影像时相接边图', self.Name_Result: self.QA_Result_Warn, self.Name_Group: self.QA_Group_Data_Integrity, self.Name_Message: '本文件缺少影像时相接边图' } ) else: parser.metadata.quality.append_total_quality( { self.Name_FileName: shp_list[0], self.Name_ID: 'shp_file', self.Name_Title: '影像时相接边图', self.Name_Result: self.QA_Result_Pass, self.Name_Group: self.QA_Group_Data_Integrity, self.Name_Message: '影像时相接边图[{0}]存在'.format(shp_list[0]) } )
def classified(self): self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None current_path = self.file_info.file_name_with_full_path metadata_file_name = CFile.join_file(current_path, self.FileName_MetaData_Bus_21AT) if CFile.file_or_path_exist(metadata_file_name): self.__bus_metadata_xml_file_name__ = metadata_file_name self.__metadata_xml_obj__ = CXml() try: self.__metadata_xml_obj__.load_file(metadata_file_name) self.__classified_object_type = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductType)) if CUtils.equal_ignore_case( self.__classified_object_type, CUtils.dict_value_by_name(self.get_information(), self.Plugins_Info_Type, None)): self._object_confirm = self.Object_Confirm_IKnown self._object_name = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductName)) except: self.__metadata_xml_obj__ = None CLogger().warning('发现文件{0}符合二十一世纪业务数据集标准, 但该文件格式有误, 无法打开! ') return self._object_confirm, self._object_name
def classified(self): """ 设计国土行业数据的dem_noframe非分幅数据的验证规则 完成 负责人 李宪 在这里检验dem_noframe的识别规则 :return: """ super().classified() file_main_name_with_path = CFile.join_file(self.file_info.file_path, self.file_info.file_main_name) check_file_main_name_exist_tif = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Tif)) check_file_main_name_exist_img = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Img)) if (not check_file_main_name_exist_tif) and (not check_file_main_name_exist_img): return self.Object_Confirm_IUnKnown, self._object_name if CUtils.equal_ignore_case(self.file_info.file_ext, self.Name_Tif) \ or CUtils.equal_ignore_case(self.file_info.file_ext, self.Name_Img): self._object_confirm = self.Object_Confirm_IKnown self._object_name = self.file_info.file_main_name file_detail_xml = '{0}_21at.xml'.format(self.file_info.file_main_name_with_full_path) self.add_file_to_details(file_detail_xml) # 将文件加入到附属文件列表中 else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name
def process_metadata_bus_dict(self): dataset = self._dataset class_plugins = self.get_class_plugins() try: dsometadataxml_xml = CXml() dsometadataxml = dataset.value_by_name(0, 'dsometadataxml_bus', '') dsometadataxml_xml.load_xml(dsometadataxml) view_path = settings.application.xpath_one( self.Path_Setting_MetaData_Dir_View, None) browser_path = CFile.file_path( dataset.value_by_name(0, 'dso_browser', None)) multiple_metadata_bus_filename_dict = \ class_plugins.get_multiple_metadata_bus_filename_with_path( CFile.join_file(view_path, browser_path) ) result, metadata_bus_dict = class_plugins.metadata_bus_xml_to_dict( dsometadataxml_xml, multiple_metadata_bus_filename_dict) self.set_metadata_bus_dict(metadata_bus_dict) return result except Exception as error: return CResult.merge_result( self.Failure, '卫星数据的业务元数据的详细内容解析出错!原因为{0}'.format(error.__str__()))
def classified(self): file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 初始化需要的参数 file_object_name = file_main_name[:] file_main_name_with_path = CFile.join_file(self.file_info.file_path, file_object_name) if CUtils.equal_ignore_case(file_ext, self.FileExt_Mbtiles): if CUtils.text_match_re(file_main_name, r'(?i)^\S+[12]\d{3}[01HQ]\d[_][0]$') \ or CUtils.text_match_re(file_main_name, r'(?i)^\S+[12]\d{3}[_][0]$'): # 结尾为0 self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list() elif CUtils.text_match_re(file_main_name, r'(?i)^\S+[12]\d{3}[01HQ]\d[_]\d+$') \ or CUtils.text_match_re(file_main_name, r'(?i)^\S+[12]\d{3}[_]\d+$'): # 结尾为单个字母的情况 self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None else: self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None elif CUtils.equal_ignore_case(file_ext, self.Transformer_XML) \ and CFile.file_or_path_exist('{0}_0.mbtiles'.format(file_main_name_with_path)): self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None else: self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None return self._object_confirm, self._object_name
def register_dm_modules(self): sql_register_dm_metadata_modules_clear = ''' truncate table dm2_modules cascade ''' sql_register_dm_metadata_modules = ''' insert into dm2_modules(dmid, dmtitle) values (:dmid, :dmtitle) ''' CFactory().give_me_db().execute(sql_register_dm_metadata_modules_clear) modules_root_dir = CSys.get_metadata_data_access_modules_root_dir() module_file_name_list = CFile.file_or_subpath_of_path( modules_root_dir, '*.{0}'.format(self.FileExt_Py)) for module_file_name in module_file_name_list: if CFile.is_file( CFile.join_file(modules_root_dir, module_file_name)) and ( not (str(module_file_name)).startswith('_')): module_name = CFile.file_main_name(module_file_name) module_obj = CObject.create_module_instance( CSys.get_metadata_data_access_modules_root_name(), module_name, CResource.DB_Server_ID_Default) module_info = module_obj.information() CFactory().give_me_db().execute( sql_register_dm_metadata_modules, { 'dmid': module_name, 'dmtitle': CUtils.dict_value_by_name( module_info, CResource.Name_Title, module_name) })
def classified(self): """ 设计国土行业数据ortho的验证规则(单景正射) 完成 负责人 王学谦 在这里检验ortho的识别规则 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 初始化需要的参数 file_object_name = file_main_name[:] file_name_with_full_path = self.file_info.file_name_with_full_path if file_name_with_full_path.endswith('_21at.xml'): file_object_name = file_main_name[:-5] file_main_name_with_path = CFile.join_file(self.file_info.file_path, file_object_name) check_file_main_name_exist = \ CFile.file_or_path_exist('{0}.{1}'.format(file_main_name_with_path, self.Name_Tif)) or \ CFile.file_or_path_exist('{0}.{1}'.format(file_main_name_with_path, self.Name_Img)) if not check_file_main_name_exist: # 检查主文件存在性 return self.Object_Confirm_IUnKnown, self._object_name # 检查后缀名 if CUtils.equal_ignore_case(file_ext, self.Name_Tif) or CUtils.equal_ignore_case(file_ext, self.Name_Img): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name file_detail_xml = '{0}_21at.xml'.format(self.file_info.file_main_name_with_full_path) self.add_file_to_details(file_detail_xml) # 将文件加入到附属文件列表中 else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name
def process_mission(self, dataset) -> str: ds_id = dataset.value_by_name(0, 'query_dir_id', '') ds_storage_id = dataset.value_by_name(0, 'query_storage_id', '') inbound_id = dataset.value_by_name(0, 'query_dir_ib_id', None) ds_subpath = dataset.value_by_name(0, 'query_subpath', '') ds_root_path = dataset.value_by_name(0, 'query_rootpath', '') ds_retry_times = dataset.value_by_name(0, 'retry_times', 0) if ds_retry_times >= self.abnormal_job_retry_times(): ds_last_process_memo = CUtils.any_2_str( dataset.value_by_name(0, 'last_process_memo', None)) process_result = CResult.merge_result( self.Failure, '{0}, \n系统已经重试{1}次, 仍然未能解决, 请人工检查修正后重试!'.format( ds_last_process_memo, ds_retry_times)) self.update_dir_status(ds_id, process_result, self.ProcStatus_Error) return process_result if ds_subpath == '': ds_subpath = ds_root_path else: ds_subpath = CFile.join_file(ds_root_path, ds_subpath) # 将所有子目录, 文件的可用性, 都改为未知!!!! self.init_file_or_subpath_valid_unknown(ds_id) try: sql_get_rule = ''' select dsdScanRule from dm2_storage_directory where dsdStorageid = :dsdStorageID and position((dsddirectory || '{0}') in :dsdDirectory) = 1 and dsdScanRule is not null order by dsddirectory desc limit 1 '''.format(CFile.sep()) rule_ds = CFactory().give_me_db(self.get_mission_db_id()).one_row( sql_get_rule, { 'dsdStorageID': ds_storage_id, 'dsdDirectory': ds_subpath }) ds_rule_content = rule_ds.value_by_name(0, 'dsScanRule', '') CLogger().debug('处理的目录为: {0}'.format(ds_subpath)) self.parser_file_or_subpath_of_path(dataset, ds_id, ds_subpath, ds_rule_content, inbound_id) result = CResult.merge_result( self.Success, '目录为[{0}]下的文件和子目录扫描处理成功!'.format(ds_subpath)) self.update_dir_status(ds_id, result) return result except Exception as err: result = CResult.merge_result( self.Failure, '目录为[{0}]下的文件和子目录扫描处理出现错误!错误原因为: {1}'.format( ds_subpath, err.__str__())) self.update_dir_status(ds_id, result) return result finally: self.exchange_file_or_subpath_valid_unknown2invalid(ds_id)
def parser_file_or_subpath_of_path(self, dataset, ds_id, ds_path, ds_rule_content, inbound_id): """ 处理目录(完整路径)下的子目录和文件 :param inbound_id: 入库标识 :param ds_rule_content: :param dataset: 数据集 :param ds_id: 路径标识 :param ds_path: 路径全名 :return: """ ds_storage_id = dataset.value_by_name(0, 'query_storage_id', '') ignore_file_array = settings.application.xpath_one( self.Path_Setting_MetaData_InBound_ignore_file, None) ignore_dir_array = settings.application.xpath_one( self.Path_Setting_MetaData_InBound_ignore_dir, None) file_list = CFile.file_or_subpath_of_path(ds_path) for file_name in file_list: file_name_with_full_path = CFile.join_file(ds_path, file_name) if CFile.is_dir(file_name_with_full_path): CLogger().debug('在目录{0}下发现子目录: {1}'.format(ds_path, file_name)) if CUtils.list_count(ignore_dir_array, file_name) > 0: CLogger().debug( '子目录: {0}在指定的忽略入库名单中, 将不入库! '.format(file_name)) continue path_obj = CDMPathInfo( self.FileType_Dir, file_name_with_full_path, dataset.value_by_name(0, 'query_storage_id', ''), None, ds_id, dataset.value_by_name(0, 'query_dir_parent_objid', None), self.get_mission_db_id(), ds_rule_content) if path_obj.white_black_valid(): path_obj.db_check_and_update(inbound_id) else: CLogger().info('目录[{0}]未通过黑白名单检验, 不允许入库! '.format( file_name_with_full_path)) elif CFile.is_file(file_name_with_full_path): if CUtils.list_count(ignore_file_array, file_name) > 0: CLogger().debug( '子目录: {0}在指定的忽略入库名单中, 将不入库! '.format(file_name)) continue CLogger().debug('在目录{0}下发现文件: {1}'.format(ds_path, file_name)) file_obj = CDMFileInfo( self.FileType_File, file_name_with_full_path, dataset.value_by_name(0, 'query_storage_id', ''), None, ds_id, dataset.value_by_name(0, 'query_dir_parent_objid', None), self.get_mission_db_id(), ds_rule_content) if file_obj.white_black_valid(): file_obj.db_check_and_update(inbound_id) else: CLogger().info('文件[{0}]未通过黑白名单检验, 不允许入库! '.format( file_name_with_full_path))
def new(self, flag): if flag == 0: compression = zipfile.ZIP_DEFLATED elif flag == 1: compression = zipfile.ZIP_STORED CFile.check_and_create_directory(self.__file_name__) self.__zip_obj__ = zipfile.ZipFile(self.__file_name__, 'a', compression)
def check_all_ib_file_or_path_existed(self, ib_id): """ 判断待入库数据的元数据, 与实体数据是否相符 . 返回CResult . 如果全部相符, 则返回True . 如果有任何一个不符, 则返回False, 且把不符的文件名通过信息返回 :param ib_id: :return: """ invalid_file_list = [] more_failure_file = False sql_all_ib_file = ''' select coalesce(dm2_storage.dstownerpath, dm2_storage.dstunipath) || dm2_storage_file.dsffilerelationname as file_name , dm2_storage_file.dsffilesize as file_size , dm2_storage_file.dsffilemodifytime as file_modify_time from dm2_storage_file left join dm2_storage on dm2_storage.dstid = dm2_storage_file.dsfstorageid where dsf_ib_id = :ib_id ''' params_all_ib_file = {'ib_id': ib_id} ds_ib_file = CFactory().give_me_db(self.get_mission_db_id()).all_row( sql_all_ib_file, params_all_ib_file) for ds_ib_file_index in range(ds_ib_file.size()): file_valid = True file_name = ds_ib_file.value_by_name(ds_ib_file_index, 'file_name', '') if not CUtils.equal_ignore_case(file_name, ''): if not CFile.file_or_path_exist(file_name): file_valid = False elif not CUtils.equal_ignore_case( CFile.file_modify_time(file_name), ds_ib_file.value_by_name(ds_ib_file_index, 'file_modify_time', '')): file_valid = False elif CFile.file_size(file_name) != ds_ib_file.value_by_name( ds_ib_file_index, 'file_size', 0): file_valid = False if not file_valid: if len(invalid_file_list) <= 3: invalid_file_list.append(file_name) else: more_failure_file = True break if len(invalid_file_list) > 0: message = '' for invalid_file in invalid_file_list: message = CUtils.str_append(message, invalid_file) if more_failure_file: message = CUtils.str_append(message, '...') message = CUtils.str_append(message, '上述数据与库中记录不统一, 请重新扫描入库! ') return CResult.merge_result(self.Failure, message) else: return CResult.merge_result(self.Success, '所有文件均存在, 且与库中记录统一! ')
def classified(self): """ 设计国土行业数据的dem_10_dem验证规则 完成 负责人 李宪 在这里检验dem_10_dem的识别规则 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext check_file_main_name_length = len(file_main_name) == 13 if not check_file_main_name_length: return self.Object_Confirm_IUnKnown, self._object_name file_main_name_with_path = CFile.join_file(self.file_info.file_path, file_main_name) check_file_main_name_exist_tif = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Tif)) check_file_main_name_exist_bil = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Bil)) if (not check_file_main_name_exist_tif) and ( not check_file_main_name_exist_bil): return self.Object_Confirm_IUnKnown, self._object_name """ 下面判别第1位是字母 下面判别第4位是字母 下面判别第23位是数字 下面判别第567位是数字 下面判别第8910位是数字 下面判别第111213位是DOM """ char_1 = file_main_name[0:1] char_2_3 = file_main_name[1:3] char_4 = file_main_name[3:4] char_5_to_7 = file_main_name[4:7] char_8_to_10 = file_main_name[7:10] char_11_to_13 = file_main_name[10:13] if CUtils.text_is_alpha(char_1) is False \ or CUtils.text_is_numeric(char_2_3) is False \ or CUtils.text_is_alpha(char_4) is False \ or CUtils.text_is_numeric(char_5_to_7) is False \ or CUtils.text_is_numeric(char_8_to_10) is False \ or CUtils.equal_ignore_case(char_11_to_13, "DEM") is False: return self.Object_Confirm_IUnKnown, self._object_name if CUtils.equal_ignore_case(file_ext, self.Name_Tif) \ or CUtils.equal_ignore_case(file_ext, self.Name_Bil): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name
def process(self) -> str: """ 完成 负责人 张源博、赵宇飞 在这里提取影像数据的快视图, 将元数据文件存储在self.file_content.view_root_dir下 注意返回的串中有快视图和拇指图的文件名 注意: 如果出现内存泄漏现象, 则使用新建进程提取元数据, 放置到文件中, 在本进程中解析元数据!!! :return: """ # 获取对象类型 type = 'default' group = 'default' catalog = 'default' # 构建数据对象object对应的识别插件,获取get_information里面的信息 class_classified_obj = CObject.get_plugins_instance_by_object_id(self.file_info.db_server_id, self.object_id) if class_classified_obj is not None: plugins_info = class_classified_obj.get_information() type = CUtils.dict_value_by_name(plugins_info, class_classified_obj.Plugins_Info_Type, 'default') group = CUtils.dict_value_by_name(plugins_info, class_classified_obj.Plugins_Info_Group, 'default') catalog = CUtils.dict_value_by_name(plugins_info, class_classified_obj.Plugins_Info_Catalog, 'default') create_time = CTime.today() create_format_time = CTime.format_str(create_time, '%Y%m%d') year = CTime.format_str(create_time, '%Y') month = CTime.format_str(create_time, '%m') day = CTime.format_str(create_time, '%d') sep = CFile.sep() # 操作系统的不同处理分隔符不同 sep_list = [catalog, group, type, year, month, day] relative_path_part = sep.join(sep_list) # 相对路径格式 view_relative_path_browse = r'{2}{0}{2}{1}_browse.png'.format(relative_path_part, self.object_id, sep) view_relative_path_thumb = r'{2}{0}{2}{1}_thumb.jpg'.format(relative_path_part, self.object_id, sep) view_relative_path_geotiff = r'{2}{0}{2}{1}_browse.tiff'.format(relative_path_part, self.object_id, sep) browse_full_path = CFile.join_file(self.file_content.view_root_dir, view_relative_path_browse) thumb_full_path = CFile.join_file(self.file_content.view_root_dir, view_relative_path_thumb) geotiff_full_path = CFile.join_file(self.file_content.view_root_dir, view_relative_path_geotiff) # 进程调用模式 json_out_view = CJson() json_out_view.set_value_of_name('image_path', self.file_info.file_name_with_full_path) json_out_view.set_value_of_name('browse_full_path', browse_full_path) json_out_view.set_value_of_name('thumb_full_path', thumb_full_path) json_out_view.set_value_of_name('geotiff_full_path', geotiff_full_path) result_view = CProcessUtils.processing_method(self.create_view_json, json_out_view) # result_view = self.create_view(self.file_info.file_name_with_full_path, browse_full_path, thumb_full_path, # geotiff_full_path) # result_view = self.create_view_json(json_out_view) if CResult.result_success(result_view): result = CResult.merge_result(self.Success, '处理完毕!') result = CResult.merge_result_info(result, self.Name_Browse, view_relative_path_browse) result = CResult.merge_result_info(result, self.Name_Thumb, view_relative_path_thumb) result = CResult.merge_result_info(result, self.Name_Browse_GeoTiff, view_relative_path_geotiff) else: result = result_view return result
def qa_file_custom(self, parser: CMetaDataParser): """ 自定义的文件存在性质检, 发生在元数据解析之前 完成 负责人 王学谦 :param parser: :return: """ super().qa_file_custom(parser) file_object_name_list = re.findall(r'(?i)^([a-z]{2}\S+\d{4}[01]\d[0123]\d)[a-z][-]\d+$', self.file_info.file_main_name) file_object_name = file_object_name_list[0] # 去除尾部的F/M/P-数字 metadata_main_name_with_path = CFile.join_file(self.file_info.file_path, file_object_name) check_file_metadata_bus_exist = False ext = self.Transformer_XML temp_metadata_bus_file_Y = '{0}Y.xml'.format(metadata_main_name_with_path) temp_metadata_bus_file_P = '{0}P.xml'.format(metadata_main_name_with_path) temp_metadata_bus_file_M = '{0}M.xml'.format(metadata_main_name_with_path) if CFile.file_or_path_exist(temp_metadata_bus_file_Y): # 存在Y-P-M的优先级 check_file_metadata_bus_exist = True self.metadata_bus_transformer_type = ext self.metadata_bus_src_filename_with_path = temp_metadata_bus_file_Y elif CFile.file_or_path_exist(temp_metadata_bus_file_P): check_file_metadata_bus_exist = True self.metadata_bus_transformer_type = ext self.metadata_bus_src_filename_with_path = temp_metadata_bus_file_P elif CFile.file_or_path_exist(temp_metadata_bus_file_M): check_file_metadata_bus_exist = True self.metadata_bus_transformer_type = ext self.metadata_bus_src_filename_with_path = temp_metadata_bus_file_M if not check_file_metadata_bus_exist: parser.metadata.quality.append_total_quality( { self.Name_FileName: '', self.Name_ID: 'metadata_file', self.Name_Title: '元数据文件', self.Name_Result: self.QA_Result_Error, self.Name_Group: self.QA_Group_Data_Integrity, self.Name_Message: '本文件缺少业务元数据' } ) else: parser.metadata.quality.append_total_quality( { self.Name_FileName: self.metadata_bus_src_filename_with_path, self.Name_ID: 'metadata_file', self.Name_Title: '元数据文件', self.Name_Result: self.QA_Result_Pass, self.Name_Group: self.QA_Group_Data_Integrity, self.Name_Message: '业务元数据[{0}]存在'.format(self.metadata_bus_src_filename_with_path) } )
def save_file(self, filename, encoding=Encoding_UTF8): """ 通过给定的xml文件名, 对xml对象进行初始化 :param filename: :param encoding: :return: """ CFile.check_and_create_directory(filename) self.__xml_tree = etree.ElementTree(self.__xml_root_node) self.__xml_tree.write(filename, encoding=encoding, xml_declaration=True)
def get_classified_character_of_affiliated_keyword(self): """ 设置识别的特征 """ file_mian_name = self.file_info.file_main_name file_path = self.file_info.file_path same_name_list = CFile.file_or_subpath_of_path(file_path, file_mian_name[:-1] + r'.\..*$', CFile.MatchType_Regex) last_letter_list = list() if len(same_name_list) > 0: for same_file_full_name in same_name_list: same_file_name = CFile.file_name(same_file_full_name) last_letter_list.append(same_file_name[-1:].lower()) if 'a' in last_letter_list: RegularExpression_letter = 'bcd' RegularExpression_main_letter = 'a' elif 'b' in last_letter_list: RegularExpression_letter = 'cd' RegularExpression_main_letter = 'b' elif 'c' in last_letter_list: RegularExpression_letter = 'd' RegularExpression_main_letter = 'c' elif 'd' in last_letter_list: RegularExpression_letter = '' RegularExpression_main_letter = 'd' else: RegularExpression_letter = 'bcd' RegularExpression_main_letter = 'a' else: RegularExpression_letter = 'bcd' RegularExpression_main_letter = 'a' return [ { self.Name_ID: self.Name_FileName, # 配置附属文件名的匹配规则 self.Name_RegularExpression: r'(?i)^.{10}\d{2}[pm]\d{4}[' + RegularExpression_letter + 'mp]$' }, { self.Name_ID: self.Name_FilePath, # 配置附属文件路径的匹配规则 self.Name_RegularExpression: r'(?i)\d{4}.{2}[\\\\/]FenFu[\\\\/]' + self.get_coordinate_system_title() }, { self.Name_ID: self.Name_FileExt, self.Name_RegularExpression: '(?i)^(tif|tiff|tfw|xml)$' # 配置附属文件后缀名的匹配规则 }, { self.Name_ID: self.Name_FileMain, # 配置需要验证主文件存在性的 文件路径 self.Name_FilePath: self.file_info.file_path, # 配置需要验证主文件的匹配规则,对于文件全名匹配 self.Name_RegularExpression: '(?i)^' + self.file_info.file_main_name[:-1] + '[o' + RegularExpression_main_letter + r']\.tif[f]?' } ]
def classified(self): """ 设计国土行业数据mosaic的验证规则(镶嵌影像) 完成 负责人 王学谦 在这里检验mosaic的识别规则 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 初始化需要的参数 file_name_with_full_path = self.file_info.file_name_with_full_path file_object_name = file_main_name[:] # 主要名称截取 if file_name_with_full_path.endswith('_21at.xml'): # 元数据文件的情况 if len(file_main_name) > 5: file_object_name = file_main_name[:-5] else: return self.Object_Confirm_IUnKnown, self._object_name else: # 矢量文件的情况 xq_list = [ 'xq.shp', 'xq.shx', 'xq.dbf', 'xq.sbx', 'xq.prj', 'xq.sbn' ] for xq_end in xq_list: if file_name_with_full_path.lower().endswith(xq_end): if len(file_main_name) > 2: file_object_name = file_main_name[:-2] break else: return self.Object_Confirm_IUnKnown, self._object_name file_main_name_with_path = CFile.join_file(self.file_info.file_path, file_object_name) check_file_main_name_exist = \ CFile.file_or_path_exist('{0}.{1}'.format(file_main_name_with_path, self.Name_Tif)) or \ CFile.file_or_path_exist('{0}.{1}'.format(file_main_name_with_path, self.Name_Img)) if not check_file_main_name_exist: # 检查主文件存在性 return self.Object_Confirm_IUnKnown, self._object_name check_file_shp_exist = CFile.file_or_path_exist( '{0}xq.shp'.format(file_main_name_with_path)) if not check_file_shp_exist: # 检查矢量文件存在性 return self.Object_Confirm_IUnKnown, self._object_name # 检查后缀名 if CUtils.equal_ignore_case(file_ext, self.Name_Tif) or CUtils.equal_ignore_case( file_ext, self.Name_Img): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list() else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name
def test_file_info_list(self): return [{ self.Name_Test_File_Type: self.FileType_File, self.Name_Test_file_path: '202008{0}ZhengJing{0}1990天津任意直角坐标系{0}BJ2{0}BJ2-1_002E74VI_001_0120200730014001_005-20200828{0}BJ21_002E74VI_001_0120200730014001_00520200828F.img' .format(CFile.sep()), self.Name_Test_object_confirm: self.Object_Confirm_IKnown, self.Name_Test_object_name: 'BJ21_002E74VI_001_0120200730014001_00520200828F' }, { self.Name_Test_File_Type: self.FileType_File, self.Name_Test_file_path: '202008{0}ZhengJing{0}1990天津任意直角坐标系{0}BJ2{0}BJ2-1_002E74VI_001_0120200730014001_005-20200828{0}BJ21_002E74VI_001_0120200730014001_00520200828M.img' .format(CFile.sep()), self.Name_Test_object_confirm: self.Object_Confirm_IKnown_Not, self.Name_Test_object_name: None }, { self.Name_Test_File_Type: self.FileType_File, self.Name_Test_file_path: '202008{0}ZhengJing{0}1990天津任意直角坐标系{0}BJ2{0}BJ2-1_002E74VI_001_0120200730014001_005-20200828{0}BJ21_002E74VI_001_0120200730014001_00520200828P.img' .format(CFile.sep()), self.Name_Test_object_confirm: self.Object_Confirm_IKnown_Not, self.Name_Test_object_name: None }, { self.Name_Test_File_Type: self.FileType_File, self.Name_Test_file_path: '202008{0}ZhengJing{0}1990天津任意直角坐标系{0}BJ2{0}BJ2-1_002E74VI_001_0120200730014001_005-20200828{0}BJ21_002E74VI_001_0120200730014001_00520200828T.XML' .format(CFile.sep()), self.Name_Test_object_confirm: self.Object_Confirm_IKnown_Not, self.Name_Test_object_name: None }, { self.Name_Test_File_Type: self.FileType_File, self.Name_Test_file_path: '202008{0}ZhengJing{0}1990天津任意直角坐标系{0}BJ2{0}BJ2-1_002E74VI_001_0120200730014001_005-20200828{0}BJ21_002E74VI_001_0120200730014001_00520200828Y.XML' .format(CFile.sep()), self.Name_Test_object_confirm: self.Object_Confirm_IKnown_Not, self.Name_Test_object_name: None }]
def a_file(cls, audit_id, audit_title, audit_group, audit_result, file_name_with_path, qa_items: dict) -> list: result_dict = cls.__init_audit_dict__(audit_id, audit_title, audit_group, audit_result) if CFile.file_or_path_exist(file_name_with_path): return cls.__a_check_file__(result_dict, file_name_with_path, qa_items) else: result_dict[cls.Name_Message] = '文件[{0}]不存在, 请检查'.format( CFile.file_name(file_name_with_path)) return [result_dict]
def qa_file_custom(self, parser: CMetaDataParser): """ 自定义的文件存在性质检, 发生在元数据解析之前 完成 负责人 李宪 :param parser: :return: """ super().qa_file_custom(parser) file_metadata_name_with_path = CFile.join_file( self.file_info.file_path, self.file_info.file_main_name) check_file_metadata_name_exist = False ext_list = ['xls', 'xlsx', 'mat', 'mdb'] for ext in ext_list: temp_metadata_bus_file = '{0}.{1}'.format( file_metadata_name_with_path, ext) if CFile.file_or_path_exist(temp_metadata_bus_file): check_file_metadata_name_exist = True self.metadata_bus_transformer_type = ext self.metadata_bus_src_filename_with_path = temp_metadata_bus_file break if not check_file_metadata_name_exist: parser.metadata.quality.append_total_quality({ self.Name_FileName: '', self.Name_ID: 'metadata_file', self.Name_Title: '元数据文件', self.Name_Result: self.QA_Result_Error, self.Name_Group: self.QA_Group_Data_Integrity, self.Name_Message: '本文件缺少业务元数据' }) else: parser.metadata.quality.append_total_quality({ self.Name_FileName: self.metadata_bus_src_filename_with_path, self.Name_ID: 'metadata_file', self.Name_Title: '元数据文件', self.Name_Result: self.QA_Result_Pass, self.Name_Group: self.QA_Group_Data_Integrity, self.Name_Message: '业务元数据[{0}]存在'.format(self.metadata_bus_src_filename_with_path) })
def __plugins_classified_of_directory__( cls, file_info: CDMFilePathInfoEx) -> CPlugins: """ 使用系统目录下的所有插件进行识别 :param file_info: :return: """ target = file_info.file_main_name target_type = file_info.file_type plugins_root_package_name = '{0}.{1}'.format( CSys.get_plugins_package_root_name(), target_type) path = CFile.join_file(CSys.get_plugins_root_dir(), target_type) plugins_file_list = CFile.file_or_subpath_of_path( path, '{0}_*.{1}'.format(cls.Name_Plugins, cls.FileExt_Py)) for file_name_without_path in plugins_file_list: file_main_name = CFile.file_main_name(file_name_without_path) try: class_classified_obj = CObject.create_plugins_instance( plugins_root_package_name, file_main_name, file_info) obj_info = class_classified_obj.get_information() # 王西亚 添加逻辑于2021-02-12, 解决项目插件调试不方便的问题 obj_classified_valid = False obj_owner_project_id = CUtils.dict_value_by_name( obj_info, CPlugins.Plugins_Info_Project_ID, None) system_project_id = settings.application.xpath_one( cls.Path_Setting_Project_ID, None) if obj_owner_project_id is None: obj_classified_valid = True else: obj_classified_valid = CUtils.equal_ignore_case( obj_owner_project_id, system_project_id) if obj_classified_valid: object_confirm, object_name = class_classified_obj.classified( ) if object_confirm != cls.Object_Confirm_IUnKnown: obj_id = class_classified_obj.get_id() CLogger().debug('{0} is classified as {1}.{2}'.format( target, obj_info, obj_id)) return class_classified_obj except Exception as error: CLogger().debug('插件[{0}]解析出现异常, 错误信息为: [{1}], 请检查!'.format( file_main_name, error.__str__())) if settings.application.xpath_one( '{0}.{1}'.format(cls.Name_Application, cls.Name_Debug), cls.DB_False) == cls.DB_True: raise else: continue else: return None
def classified(self): """ 设计国土行业数据的dom-10验证规则 完成 负责人 李宪 在这里检验dem-10的元数据文件格式时, 应该一个一个类型的对比, 找到文件时, 将该文件的格式和文件名存储到类的私有属性中, 以便在元数据处理时直接使用 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext check_file_main_name_length = len(file_main_name) == 10 if not check_file_main_name_length: return self.Object_Confirm_IUnKnown, self._object_name file_main_name_with_path = CFile.join_file(self.file_info.file_path, file_main_name) check_file_main_name_exist_tif = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Tif)) check_file_main_name_exist_bil = CFile.file_or_path_exist( '{0}.{1}'.format(file_main_name_with_path, self.Name_Bil)) if (not check_file_main_name_exist_tif) and ( not check_file_main_name_exist_bil): return self.Object_Confirm_IUnKnown, self._object_name """ 下面判别第1位是字母 下面判别第4位是字母 下面判别第23位是数字 下面判别第567位是数字 下面判别第8910位是数字 """ char_1 = file_main_name[0:1] char_2_3 = file_main_name[1:3] char_4 = file_main_name[3:4] char_5_to_7 = file_main_name[4:7] char_8_to_10 = file_main_name[7:10] if CUtils.text_is_alpha(char_1) is False \ or CUtils.text_is_numeric(char_2_3) is False \ or CUtils.text_is_alpha(char_4) is False \ or CUtils.text_is_numeric(char_5_to_7) is False \ or CUtils.text_is_numeric(char_8_to_10) is False: return self.Object_Confirm_IUnKnown, self._object_name if CUtils.equal_ignore_case(file_ext, self.Name_Tif) \ or CUtils.equal_ignore_case(file_ext, self.Name_Bil): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name