def classified(self): """ 设计国土行业数据guoqing_frame的验证规则(国情影像—分幅影像) 完成 负责人 王学谦 在这里检验guoqing_frame的识别规则 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 初始化需要的参数 file_path = self.file_info.file_path file_object_name = file_main_name[:] if len(file_main_name) >= 21: # 本类文件默认至少为20位 file_object_name = file_main_name[:20] # 截取前20位 elif len(file_main_name) == 20: # 20位基本为附属文件 pass else: return self.Object_Confirm_IUnKnown, self._object_name match_str = '(?i)^' + file_object_name + r'[a-zA-Z][.]tif' check_file_main_name_exist = \ CFile.find_file_or_subpath_of_path(file_path, match_str, CFile.MatchType_Regex) if not check_file_main_name_exist: # 检查主文件存在性 return self.Object_Confirm_IUnKnown, self._object_name # 文件名第1,4,11,12,16,21位为字母,第2,3,5-10,14,15,17-20位是数字 name_sub_1 = file_main_name[0:1] name_sub_2_to_3 = file_main_name[1:3] name_sub_4 = file_main_name[3:4] name_sub_5_to_10 = file_main_name[4:10] name_sub_11_to_12 = file_main_name[10:12] name_sub_14_to_15 = file_main_name[13:15] name_sub_16 = file_main_name[15:16] name_sub_17_to_20 = file_main_name[16:20] name_sub_21 = file_main_name[20:21] if CUtils.text_is_alpha(name_sub_1) is False \ or CUtils.text_is_numeric(name_sub_2_to_3) is False \ or CUtils.text_is_alpha(name_sub_4) is False \ or CUtils.text_is_numeric(name_sub_5_to_10) is False \ or CUtils.text_is_alpha(name_sub_11_to_12) is False \ or CUtils.text_is_numeric(name_sub_14_to_15) is False \ or CUtils.text_is_alpha(name_sub_16) is False \ or CUtils.text_is_numeric(name_sub_17_to_20) is False \ or CUtils.text_is_alpha(name_sub_21) is False: return self.Object_Confirm_IUnKnown, self._object_name if len(file_main_name) == 21 and CUtils.equal_ignore_case( file_ext, 'tif'): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_object_name) # 在这里设置不同名的附属文件 else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name
def classified(self): self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None current_path = self.file_info.file_name_with_full_path if (self.file_info.file_name_without_path.lower().endswith('.gdb')) \ and CFile.find_file_or_subpath_of_path(current_path, '*.gdbtable'): self._object_confirm = self.Object_Confirm_IKnown self._object_name = self.file_info.file_main_name return self._object_confirm, self._object_name
def classified(self): """ 设计国土行业数据guoqing_scene_noblock的验证规则(国情影像—非分块),不带数字 完成 负责人 王学谦 在这里检验guoqing_scene_noblock的识别规则 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 初始化需要的参数 file_path = self.file_info.file_path file_object_name = file_main_name[:] # 这里需要取得规则匹配用的‘对象名’,即去除尾部字母等字符的名 # 正则表达式,(?i)代表大小写不敏感,^代表字符串开头,$代表字符串结尾 # [a-z]指匹配所有小写字母,配合(?i)匹配所有字母,{2}代表前面的匹配模式匹配2次,即[a-z]{2}匹配两个字母 # \d匹配数字,即[0-9],即\d+匹配一个或多个非空字符,\d{4}匹配四个任意数字 # [0123]一般指匹配一个括号中任意字符,即匹配0到3 # \S用于匹配所有非空字符,+代表匹配前面字符的数量为至少一个,即\S+匹配一个或多个非空字符 if len(file_main_name) < 13: return self.Object_Confirm_IUnKnown, self._object_name # 下面正则:开头两个字母,字母后任意数量字符,而后匹配8位时间,4位任意数字(年份),[01]\d为月份,[0123]\d日 if CUtils.text_match_re( file_main_name, r'(?i)^[a-z]{2}\S+' r'\d{4}[01]\d[0123]\d[a-z]$'): # 结尾为单个字母的情况 file_object_name = file_main_name[:-1] # 这里需要取得规则匹配用的‘对象名’,即去除尾部字母 elif CUtils.text_match_re( file_main_name, r'(?i)^[a-z]{2}\S+' # 带-的抛出 r'\d{4}[01]\d[0123]\d[a-z][-]\d+$'): return self.Object_Confirm_IUnKnown, self._object_name elif CUtils.text_match_re( file_main_name, r'(?i)^[a-z]{2}\S+' # 尾部没字母取原本主名 r'\d{4}[01]\d[0123]\d$'): pass elif CUtils.text_match_re( file_main_name, r'(?i)^[a-z]{2}\S+' r'\d{4}[01]\d[0123]\d\S+$'): # 结尾为多个的字符情况 file_object_name_list = re.findall( r'(?i)^([a-z]{2}\S+\d{4}[01]\d[0123]\d)\S+$', file_main_name) file_object_name = file_object_name_list[0] # 剔除结尾多个字符 match_str = '(?i)^' + file_object_name + r'[FMP].img$' # 匹配主文件的规则,即对象名+F/M/P check_file_main_name_exist = CFile.find_file_or_subpath_of_path( file_path, match_str, CFile.MatchType_Regex) if not check_file_main_name_exist: # 检查主文件存在性 return self.Object_Confirm_IUnKnown, self._object_name """文件名第1-2位为字母,最后1位是字母在F/P/M中,倒数2-9位是数字""" name_sub_1_to_2 = file_object_name[0:2] name_sub_backwards_9_to_2 = file_object_name[-8:] if CUtils.text_is_alpha(name_sub_1_to_2) is False \ or CUtils.text_is_numeric(name_sub_backwards_9_to_2) is False: return self.Object_Confirm_IUnKnown, self._object_name # 作为对象的主文件存在优先级,F-M-P,比如需要F的文件不存在,M才能是主文件 # 能跑到这里的文件已经可以认为不是主文件,就是附属文件 match_str_f = '(?i)^' + file_object_name + r'[F].img$' match_str_fm = '(?i)^' + file_object_name + r'[FM].img$' name_sub_backwards_1 = file_main_name[-1:] if CUtils.equal_ignore_case(name_sub_backwards_1.lower(), 'f') \ and CUtils.equal_ignore_case(file_ext.lower(), 'img'): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_object_name) elif CUtils.equal_ignore_case(name_sub_backwards_1.lower(), 'm') \ and CUtils.equal_ignore_case(file_ext.lower(), 'img') \ and not CFile.find_file_or_subpath_of_path(file_path, match_str_f, CFile.MatchType_Regex): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_object_name) elif CUtils.equal_ignore_case(name_sub_backwards_1.lower(), 'p') \ and CUtils.equal_ignore_case(file_ext.lower(), 'img') \ and not CFile.find_file_or_subpath_of_path(file_path, match_str_fm, CFile.MatchType_Regex): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_object_name) else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name
def classified(self): """ 关键字识别 """ super().classified() # 预获取需要的参数 file_path = self.file_info.file_path file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 预定义逻辑参数 数据文件匹配 object_file_name_flag = False object_file_path_flag = False object_file_ext_flag = False object_affiliated_file_main_flag = False object_file_affiliated_flag = False object_keyword_list = self.get_classified_character_of_object_keyword() if len(object_keyword_list) > 0: for keyword_info in object_keyword_list: keyword_id = CUtils.dict_value_by_name(keyword_info, self.Name_ID, None) regex_match = CUtils.dict_value_by_name( keyword_info, self.Name_RegularExpression, '.*') if regex_match is None: regex_match = '.*' if CUtils.equal_ignore_case(keyword_id, self.Name_FileName): if CUtils.text_match_re(file_main_name, regex_match): object_file_name_flag = True elif CUtils.equal_ignore_case(keyword_id, self.Name_FilePath): if CUtils.text_match_re(file_path, regex_match): object_file_path_flag = True elif CUtils.equal_ignore_case(keyword_id, self.Name_FileExt): if CUtils.text_match_re(file_ext, regex_match): object_file_ext_flag = True else: same_name_file_list = CFile.file_or_dir_fullname_of_path( file_path, False, '(?i)^' + file_main_name + '[.].*$', CFile.MatchType_Regex) if len(same_name_file_list) > 0: for same_name_file in same_name_file_list: same_name_file_ext = CFile.file_ext( same_name_file) if CUtils.text_match_re( same_name_file_ext, regex_match): object_affiliated_file_main_flag = True elif CUtils.equal_ignore_case(keyword_id, self.Name_FileAffiliated): affiliated_file_path = CUtils.dict_value_by_name( keyword_info, self.Name_FilePath, None) if affiliated_file_path is not None: if CFile.find_file_or_subpath_of_path( affiliated_file_path, regex_match, CFile.MatchType_Regex): object_file_affiliated_flag = True else: object_file_affiliated_flag = True # 预定义逻辑参数 附属文件匹配 affiliated_file_name_flag = False affiliated_file_path_flag = False affiliated_file_ext_flag = False affiliated_file_main_flag = False affiliated_keyword_list = self.get_classified_character_of_affiliated_keyword( ) if len(affiliated_keyword_list) > 0: for keyword_info in affiliated_keyword_list: keyword_id = CUtils.dict_value_by_name(keyword_info, self.Name_ID, None) regex_match = CUtils.dict_value_by_name( keyword_info, self.Name_RegularExpression, '.*') if regex_match is None: regex_match = '.*' if CUtils.equal_ignore_case(keyword_id, self.Name_FileName): if CUtils.text_match_re(file_main_name, regex_match): affiliated_file_name_flag = True elif CUtils.equal_ignore_case(keyword_id, self.Name_FilePath): if CUtils.text_match_re(file_path, regex_match): affiliated_file_path_flag = True elif CUtils.equal_ignore_case(keyword_id, self.Name_FileExt): if CUtils.text_match_re(file_ext, regex_match): affiliated_file_ext_flag = True elif CUtils.equal_ignore_case(keyword_id, self.Name_FileMain): affiliated_file_path = CUtils.dict_value_by_name( keyword_info, self.Name_FilePath, None) if affiliated_file_path is not None: if CFile.find_file_or_subpath_of_path( affiliated_file_path, regex_match, CFile.MatchType_Regex): affiliated_file_main_flag = True if object_file_name_flag and object_file_path_flag and \ object_file_ext_flag and object_file_affiliated_flag: self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.set_custom_affiliated_file() elif affiliated_file_name_flag and affiliated_file_path_flag and \ affiliated_file_ext_flag and affiliated_file_main_flag: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None elif object_file_name_flag and object_file_path_flag and object_affiliated_file_main_flag: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None else: self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None return self._object_confirm, self._object_name
def classified(self): """ 设计国土行业数据third_survey_noblock的验证规则(三调影像—非分块) 完成 负责人 王学谦 在这里检验third_survey_noblock的识别规则 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 初始化需要的参数 file_path = self.file_info.file_path if len(file_main_name) > 6: file_name_before_six = file_main_name[0:6] # 截取前六位行政区划代码 else: return self.Object_Confirm_IUnKnown, self._object_name # 主名必然大于6 if not CUtils.text_is_numeric(CUtils.any_2_str(file_name_before_six)): return self.Object_Confirm_IUnKnown, self._object_name # 前六位必然为数字 # 正则表达式,(?i)代表大小写不敏感,^代表字符串开头,$代表字符串结尾 # \S用于匹配所有非空字符,+代表匹配前面字符的数量为至少一个,即\S+匹配一个或多个非空字符 # \d匹配数字,即[0-9],即\d+匹配一个或多个非空字符 match_str = '(?i)^' + file_name_before_six + r'\S+dom.img$' check_file_main_name_exist = CFile.find_file_or_subpath_of_path( file_path, match_str, CFile.MatchType_Regex) if not check_file_main_name_exist: # 检查主文件存在性 return self.Object_Confirm_IUnKnown, self._object_name # file_name_before_six_name = '' # file_metadata_name = '{0}{1}'.format(file_name_before_six, file_name_before_six_name) # file_metadata_name_with_path = CFile.join_file(file_path, file_metadata_name) # check_file_mdb_exist = CFile.file_or_path_exist('{0}.mdb'.format(file_metadata_name_with_path)) # if not check_file_mdb_exist: # 检查mdb文件存在性 # return self.Object_Confirm_IUnKnown, self._object_name if len(file_main_name) >= 12: name_sub_7_to_8 = file_main_name[6:8] name_sub_backwards_3_to_1 = file_main_name[-3:] if CUtils.text_is_alpha(name_sub_7_to_8) \ and CUtils.equal_ignore_case(CUtils.any_2_str(name_sub_backwards_3_to_1).lower(), 'dom'): if CUtils.equal_ignore_case(file_ext, 'img'): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_name_before_six) else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None else: # 运行到此的文件,如果格式为以下,则默认为附属文件 affiliated_ext_list = [ 'mdb', 'shp', 'shx', 'dbf', 'sbx', 'prj', 'sbn' ] if file_ext.lower() in affiliated_ext_list: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None else: return self.Object_Confirm_IUnKnown, self._object_name else: # 运行到此的文件,如果格式为以下,则默认为附属文件 affiliated_ext_list = [ 'mdb', 'shp', 'shx', 'dbf', 'sbx', 'prj', 'sbn' ] if file_ext.lower() in affiliated_ext_list: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None else: return self.Object_Confirm_IUnKnown, self._object_name return self._object_confirm, self._object_name
def classified(self): """ 设计国土行业数据guoqing_scene_block的验证规则(国情影像—分块),带数字 F-1/F-2 完成 负责人 王学谦 在这里检验guoqing_scene_block的识别规则 :return: """ super().classified() file_main_name = self.file_info.file_main_name file_ext = self.file_info.file_ext # 初始化需要的参数 file_path = self.file_info.file_path file_object_name = file_main_name[:] # 这里需要取得规则匹配用的‘对象名’,即去除尾部字母等字符的名 # 正则表达式,(?i)代表大小写不敏感,^代表字符串开头,$代表字符串结尾 # [a-z]指匹配所有小写字母,配合(?i)匹配所有字母,{2}代表前面的匹配模式匹配2次,即[a-z]{2}匹配两个字母 # \d匹配数字,即[0-9],即\d+匹配一个或多个非空字符,\d{4}匹配四个任意数字 # [0123]一般指匹配一个括号中任意字符,即匹配0到3 # \S用于匹配所有非空字符,+代表匹配前面字符的数量为至少一个,即\S+匹配一个或多个非空字符 if len(file_main_name) < 13: return self.Object_Confirm_IUnKnown, self._object_name if CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+' r'\d{4}[01]\d[0123]\d[a-z][-]\d+$'): # [a-z][-]\d+$结尾为字母-数字 # re.findall获取在正则表达式中所加括号,括号中的字符,这里去剔除结尾字母-数字后的字符 file_object_name_list = re.findall(r'(?i)^([a-z]{2}\S+\d{4}[01]\d[0123]\d)[a-z][-]\d+$', file_main_name) file_object_name = file_object_name_list[0] elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+' # 尾部只有单个字母的情况 r'\d{4}[01]\d[0123]\d[FMP]$') and \ CUtils.equal_ignore_case(file_ext.lower(), 'img'): return self.Object_Confirm_IUnKnown, self._object_name elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+' # 尾部只有单个字母的情况 r'\d{4}[01]\d[0123]\d[a-z]$'): file_object_name = file_main_name[:-1] # 下面正则:开头两个字母,字母后任意数量字符,而后匹配8位时间,4位任意数字(年份),[01]\d为月份,[0123]\d日 elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+' # 尾部没字母取原本主名 r'\d{4}[01]\d[0123]\d$'): pass elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+' # 尾部无字母,但是有任意附加字符的情况 r'\d{4}[01]\d[0123]\d\S+$'): file_object_name_list = re.findall(r'(?i)^([a-z]{2}\S+\d{4}[01]\d[0123]\d)\S+$', file_main_name) file_object_name = file_object_name_list[0] match_str = '(?i)^' + file_object_name + r'[FMP][-]\d+.img$' check_file_main_name_exist = \ CFile.find_file_or_subpath_of_path(file_path, match_str, CFile.MatchType_Regex) if not check_file_main_name_exist: # 检查主文件存在性 return self.Object_Confirm_IUnKnown, self._object_name """文件名第1-2位为字母,倒数1-3位是在F-1/P-1/M-1中,倒数4-11位是数字""" name_sub_1_to_2 = file_object_name[0:2] name_sub_backwards_11_to_4 = file_object_name[-8:] if CUtils.text_is_alpha(name_sub_1_to_2) is False \ or CUtils.text_is_numeric(name_sub_backwards_11_to_4) is False: return self.Object_Confirm_IUnKnown, self._object_name # 取得尾部的字母 name_sub_backwards_fmp_list = re.findall(r'(?i)^[a-z]{2}\S+\d{4}[01]\d[0123]\d([a-z])[-]\d+$', file_main_name) # 取得尾部的数字 name_sub_backwards_num_list = re.findall(r'(?i)^[a-z]{2}\S+\d{4}[01]\d[0123]\d[a-z][-](\d+)$', file_main_name) # 如果尾部没有数字与字母,但是已经通过前面的过滤,则默认为附属文件 if len(name_sub_backwards_fmp_list) > 0 and len(name_sub_backwards_num_list) > 0: name_sub_backwards_fmp = name_sub_backwards_fmp_list[0] name_sub_backwards_num = name_sub_backwards_num_list[0] match_str_f = '(?i)^' + file_object_name + r'[F][-][' + name_sub_backwards_num + '].img$' match_str_fm = '(?i)^' + file_object_name + r'[FM][-][' + name_sub_backwards_num + '].img$' # 作为对象的主文件存在优先级,F-M-P,比如需要F的文件不存在,M才能是主文件 if CUtils.equal_ignore_case(name_sub_backwards_fmp.lower(), 'f') \ and CUtils.equal_ignore_case(file_ext.lower(), 'img'): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_object_name, name_sub_backwards_num) elif CUtils.equal_ignore_case(name_sub_backwards_fmp.lower(), 'm') \ and CUtils.equal_ignore_case(file_ext.lower(), 'img') \ and not CFile.find_file_or_subpath_of_path(file_path, match_str_f, CFile.MatchType_Regex): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_object_name, name_sub_backwards_num) elif CUtils.equal_ignore_case(name_sub_backwards_fmp.lower(), 'p') \ and CUtils.equal_ignore_case(file_ext.lower(), 'img') \ and not CFile.find_file_or_subpath_of_path(file_path, match_str_fm, CFile.MatchType_Regex): self._object_confirm = self.Object_Confirm_IKnown self._object_name = file_main_name self.add_file_to_detail_list(file_object_name, name_sub_backwards_num) else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None else: self._object_confirm = self.Object_Confirm_IKnown_Not self._object_name = None return self._object_confirm, self._object_name