Пример #1
0
    def classified(self):
        """
        设计国土行业数据guoqing_frame的验证规则(国情影像—分幅影像)
        完成 负责人 王学谦 在这里检验guoqing_frame的识别规则
        :return:
        """
        super().classified()
        file_main_name = self.file_info.file_main_name
        file_ext = self.file_info.file_ext  # 初始化需要的参数
        file_path = self.file_info.file_path
        file_object_name = file_main_name[:]

        if len(file_main_name) >= 21:  # 本类文件默认至少为20位
            file_object_name = file_main_name[:20]  # 截取前20位
        elif len(file_main_name) == 20:  # 20位基本为附属文件
            pass
        else:
            return self.Object_Confirm_IUnKnown, self._object_name

        match_str = '(?i)^' + file_object_name + r'[a-zA-Z][.]tif'
        check_file_main_name_exist = \
            CFile.find_file_or_subpath_of_path(file_path, match_str, CFile.MatchType_Regex)
        if not check_file_main_name_exist:  # 检查主文件存在性
            return self.Object_Confirm_IUnKnown, self._object_name

        # 文件名第1,4,11,12,16,21位为字母,第2,3,5-10,14,15,17-20位是数字
        name_sub_1 = file_main_name[0:1]
        name_sub_2_to_3 = file_main_name[1:3]
        name_sub_4 = file_main_name[3:4]
        name_sub_5_to_10 = file_main_name[4:10]
        name_sub_11_to_12 = file_main_name[10:12]
        name_sub_14_to_15 = file_main_name[13:15]
        name_sub_16 = file_main_name[15:16]
        name_sub_17_to_20 = file_main_name[16:20]
        name_sub_21 = file_main_name[20:21]
        if CUtils.text_is_alpha(name_sub_1) is False \
                or CUtils.text_is_numeric(name_sub_2_to_3) is False \
                or CUtils.text_is_alpha(name_sub_4) is False \
                or CUtils.text_is_numeric(name_sub_5_to_10) is False \
                or CUtils.text_is_alpha(name_sub_11_to_12) is False \
                or CUtils.text_is_numeric(name_sub_14_to_15) is False \
                or CUtils.text_is_alpha(name_sub_16) is False \
                or CUtils.text_is_numeric(name_sub_17_to_20) is False \
                or CUtils.text_is_alpha(name_sub_21) is False:
            return self.Object_Confirm_IUnKnown, self._object_name

        if len(file_main_name) == 21 and CUtils.equal_ignore_case(
                file_ext, 'tif'):
            self._object_confirm = self.Object_Confirm_IKnown
            self._object_name = file_main_name
            self.add_file_to_detail_list(file_object_name)  # 在这里设置不同名的附属文件
        else:
            self._object_confirm = self.Object_Confirm_IKnown_Not
            self._object_name = None

        return self._object_confirm, self._object_name
Пример #2
0
    def classified(self):
        self._object_confirm = self.Object_Confirm_IUnKnown
        self._object_name = None

        current_path = self.file_info.file_name_with_full_path
        if (self.file_info.file_name_without_path.lower().endswith('.gdb')) \
                and CFile.find_file_or_subpath_of_path(current_path, '*.gdbtable'):
            self._object_confirm = self.Object_Confirm_IKnown
            self._object_name = self.file_info.file_main_name
        return self._object_confirm, self._object_name
Пример #3
0
    def classified(self):
        """
        设计国土行业数据guoqing_scene_noblock的验证规则(国情影像—非分块),不带数字
        完成 负责人 王学谦 在这里检验guoqing_scene_noblock的识别规则
        :return:
        """
        super().classified()
        file_main_name = self.file_info.file_main_name
        file_ext = self.file_info.file_ext  # 初始化需要的参数
        file_path = self.file_info.file_path
        file_object_name = file_main_name[:]  # 这里需要取得规则匹配用的‘对象名’,即去除尾部字母等字符的名

        # 正则表达式,(?i)代表大小写不敏感,^代表字符串开头,$代表字符串结尾
        # [a-z]指匹配所有小写字母,配合(?i)匹配所有字母,{2}代表前面的匹配模式匹配2次,即[a-z]{2}匹配两个字母
        # \d匹配数字,即[0-9],即\d+匹配一个或多个非空字符,\d{4}匹配四个任意数字
        # [0123]一般指匹配一个括号中任意字符,即匹配0到3
        # \S用于匹配所有非空字符,+代表匹配前面字符的数量为至少一个,即\S+匹配一个或多个非空字符
        if len(file_main_name) < 13:
            return self.Object_Confirm_IUnKnown, self._object_name
        # 下面正则:开头两个字母,字母后任意数量字符,而后匹配8位时间,4位任意数字(年份),[01]\d为月份,[0123]\d日
        if CUtils.text_match_re(
                file_main_name, r'(?i)^[a-z]{2}\S+'
                r'\d{4}[01]\d[0123]\d[a-z]$'):  # 结尾为单个字母的情况
            file_object_name = file_main_name[:-1]  # 这里需要取得规则匹配用的‘对象名’,即去除尾部字母
        elif CUtils.text_match_re(
                file_main_name,
                r'(?i)^[a-z]{2}\S+'  # 带-的抛出
                r'\d{4}[01]\d[0123]\d[a-z][-]\d+$'):
            return self.Object_Confirm_IUnKnown, self._object_name
        elif CUtils.text_match_re(
                file_main_name,
                r'(?i)^[a-z]{2}\S+'  # 尾部没字母取原本主名
                r'\d{4}[01]\d[0123]\d$'):
            pass
        elif CUtils.text_match_re(
                file_main_name, r'(?i)^[a-z]{2}\S+'
                r'\d{4}[01]\d[0123]\d\S+$'):  # 结尾为多个的字符情况
            file_object_name_list = re.findall(
                r'(?i)^([a-z]{2}\S+\d{4}[01]\d[0123]\d)\S+$', file_main_name)
            file_object_name = file_object_name_list[0]  # 剔除结尾多个字符

        match_str = '(?i)^' + file_object_name + r'[FMP].img$'  # 匹配主文件的规则,即对象名+F/M/P
        check_file_main_name_exist = CFile.find_file_or_subpath_of_path(
            file_path, match_str, CFile.MatchType_Regex)
        if not check_file_main_name_exist:  # 检查主文件存在性
            return self.Object_Confirm_IUnKnown, self._object_name
        """文件名第1-2位为字母,最后1位是字母在F/P/M中,倒数2-9位是数字"""
        name_sub_1_to_2 = file_object_name[0:2]
        name_sub_backwards_9_to_2 = file_object_name[-8:]
        if CUtils.text_is_alpha(name_sub_1_to_2) is False \
                or CUtils.text_is_numeric(name_sub_backwards_9_to_2) is False:
            return self.Object_Confirm_IUnKnown, self._object_name

        # 作为对象的主文件存在优先级,F-M-P,比如需要F的文件不存在,M才能是主文件
        # 能跑到这里的文件已经可以认为不是主文件,就是附属文件
        match_str_f = '(?i)^' + file_object_name + r'[F].img$'
        match_str_fm = '(?i)^' + file_object_name + r'[FM].img$'
        name_sub_backwards_1 = file_main_name[-1:]
        if CUtils.equal_ignore_case(name_sub_backwards_1.lower(), 'f') \
                and CUtils.equal_ignore_case(file_ext.lower(), 'img'):
            self._object_confirm = self.Object_Confirm_IKnown
            self._object_name = file_main_name
            self.add_file_to_detail_list(file_object_name)
        elif CUtils.equal_ignore_case(name_sub_backwards_1.lower(), 'm') \
                and CUtils.equal_ignore_case(file_ext.lower(), 'img') \
                and not CFile.find_file_or_subpath_of_path(file_path, match_str_f, CFile.MatchType_Regex):
            self._object_confirm = self.Object_Confirm_IKnown
            self._object_name = file_main_name
            self.add_file_to_detail_list(file_object_name)
        elif CUtils.equal_ignore_case(name_sub_backwards_1.lower(), 'p') \
                and CUtils.equal_ignore_case(file_ext.lower(), 'img') \
                and not CFile.find_file_or_subpath_of_path(file_path, match_str_fm, CFile.MatchType_Regex):
            self._object_confirm = self.Object_Confirm_IKnown
            self._object_name = file_main_name
            self.add_file_to_detail_list(file_object_name)
        else:
            self._object_confirm = self.Object_Confirm_IKnown_Not
            self._object_name = None

        return self._object_confirm, self._object_name
Пример #4
0
    def classified(self):
        """
        关键字识别
        """
        super().classified()
        # 预获取需要的参数
        file_path = self.file_info.file_path
        file_main_name = self.file_info.file_main_name
        file_ext = self.file_info.file_ext

        # 预定义逻辑参数 数据文件匹配
        object_file_name_flag = False
        object_file_path_flag = False
        object_file_ext_flag = False
        object_affiliated_file_main_flag = False
        object_file_affiliated_flag = False
        object_keyword_list = self.get_classified_character_of_object_keyword()
        if len(object_keyword_list) > 0:
            for keyword_info in object_keyword_list:
                keyword_id = CUtils.dict_value_by_name(keyword_info,
                                                       self.Name_ID, None)
                regex_match = CUtils.dict_value_by_name(
                    keyword_info, self.Name_RegularExpression, '.*')
                if regex_match is None:
                    regex_match = '.*'

                if CUtils.equal_ignore_case(keyword_id, self.Name_FileName):
                    if CUtils.text_match_re(file_main_name, regex_match):
                        object_file_name_flag = True
                elif CUtils.equal_ignore_case(keyword_id, self.Name_FilePath):
                    if CUtils.text_match_re(file_path, regex_match):
                        object_file_path_flag = True
                elif CUtils.equal_ignore_case(keyword_id, self.Name_FileExt):
                    if CUtils.text_match_re(file_ext, regex_match):
                        object_file_ext_flag = True
                    else:
                        same_name_file_list = CFile.file_or_dir_fullname_of_path(
                            file_path, False,
                            '(?i)^' + file_main_name + '[.].*$',
                            CFile.MatchType_Regex)
                        if len(same_name_file_list) > 0:
                            for same_name_file in same_name_file_list:
                                same_name_file_ext = CFile.file_ext(
                                    same_name_file)
                                if CUtils.text_match_re(
                                        same_name_file_ext, regex_match):
                                    object_affiliated_file_main_flag = True
                elif CUtils.equal_ignore_case(keyword_id,
                                              self.Name_FileAffiliated):
                    affiliated_file_path = CUtils.dict_value_by_name(
                        keyword_info, self.Name_FilePath, None)
                    if affiliated_file_path is not None:
                        if CFile.find_file_or_subpath_of_path(
                                affiliated_file_path, regex_match,
                                CFile.MatchType_Regex):
                            object_file_affiliated_flag = True
                    else:
                        object_file_affiliated_flag = True

        # 预定义逻辑参数 附属文件匹配
        affiliated_file_name_flag = False
        affiliated_file_path_flag = False
        affiliated_file_ext_flag = False
        affiliated_file_main_flag = False
        affiliated_keyword_list = self.get_classified_character_of_affiliated_keyword(
        )
        if len(affiliated_keyword_list) > 0:
            for keyword_info in affiliated_keyword_list:
                keyword_id = CUtils.dict_value_by_name(keyword_info,
                                                       self.Name_ID, None)
                regex_match = CUtils.dict_value_by_name(
                    keyword_info, self.Name_RegularExpression, '.*')
                if regex_match is None:
                    regex_match = '.*'

                if CUtils.equal_ignore_case(keyword_id, self.Name_FileName):
                    if CUtils.text_match_re(file_main_name, regex_match):
                        affiliated_file_name_flag = True
                elif CUtils.equal_ignore_case(keyword_id, self.Name_FilePath):
                    if CUtils.text_match_re(file_path, regex_match):
                        affiliated_file_path_flag = True
                elif CUtils.equal_ignore_case(keyword_id, self.Name_FileExt):
                    if CUtils.text_match_re(file_ext, regex_match):
                        affiliated_file_ext_flag = True
                elif CUtils.equal_ignore_case(keyword_id, self.Name_FileMain):
                    affiliated_file_path = CUtils.dict_value_by_name(
                        keyword_info, self.Name_FilePath, None)
                    if affiliated_file_path is not None:
                        if CFile.find_file_or_subpath_of_path(
                                affiliated_file_path, regex_match,
                                CFile.MatchType_Regex):
                            affiliated_file_main_flag = True

        if object_file_name_flag and object_file_path_flag and \
                object_file_ext_flag and object_file_affiliated_flag:
            self._object_confirm = self.Object_Confirm_IKnown
            self._object_name = file_main_name
            self.set_custom_affiliated_file()
        elif affiliated_file_name_flag and affiliated_file_path_flag and \
                affiliated_file_ext_flag and affiliated_file_main_flag:
            self._object_confirm = self.Object_Confirm_IKnown_Not
            self._object_name = None
        elif object_file_name_flag and object_file_path_flag and object_affiliated_file_main_flag:
            self._object_confirm = self.Object_Confirm_IKnown_Not
            self._object_name = None
        else:
            self._object_confirm = self.Object_Confirm_IUnKnown
            self._object_name = None

        return self._object_confirm, self._object_name
Пример #5
0
    def classified(self):
        """
        设计国土行业数据third_survey_noblock的验证规则(三调影像—非分块)
        完成 负责人 王学谦 在这里检验third_survey_noblock的识别规则
        :return:
        """
        super().classified()
        file_main_name = self.file_info.file_main_name
        file_ext = self.file_info.file_ext  # 初始化需要的参数
        file_path = self.file_info.file_path

        if len(file_main_name) > 6:
            file_name_before_six = file_main_name[0:6]  # 截取前六位行政区划代码
        else:
            return self.Object_Confirm_IUnKnown, self._object_name  # 主名必然大于6

        if not CUtils.text_is_numeric(CUtils.any_2_str(file_name_before_six)):
            return self.Object_Confirm_IUnKnown, self._object_name  # 前六位必然为数字

        # 正则表达式,(?i)代表大小写不敏感,^代表字符串开头,$代表字符串结尾
        # \S用于匹配所有非空字符,+代表匹配前面字符的数量为至少一个,即\S+匹配一个或多个非空字符
        # \d匹配数字,即[0-9],即\d+匹配一个或多个非空字符
        match_str = '(?i)^' + file_name_before_six + r'\S+dom.img$'
        check_file_main_name_exist = CFile.find_file_or_subpath_of_path(
            file_path, match_str, CFile.MatchType_Regex)
        if not check_file_main_name_exist:  # 检查主文件存在性
            return self.Object_Confirm_IUnKnown, self._object_name

        # file_name_before_six_name = ''
        # file_metadata_name = '{0}{1}'.format(file_name_before_six, file_name_before_six_name)
        # file_metadata_name_with_path = CFile.join_file(file_path, file_metadata_name)
        # check_file_mdb_exist = CFile.file_or_path_exist('{0}.mdb'.format(file_metadata_name_with_path))
        # if not check_file_mdb_exist:  # 检查mdb文件存在性
        #     return self.Object_Confirm_IUnKnown, self._object_name
        if len(file_main_name) >= 12:
            name_sub_7_to_8 = file_main_name[6:8]
            name_sub_backwards_3_to_1 = file_main_name[-3:]
            if CUtils.text_is_alpha(name_sub_7_to_8) \
                    and CUtils.equal_ignore_case(CUtils.any_2_str(name_sub_backwards_3_to_1).lower(),
                                                 'dom'):
                if CUtils.equal_ignore_case(file_ext, 'img'):
                    self._object_confirm = self.Object_Confirm_IKnown
                    self._object_name = file_main_name
                    self.add_file_to_detail_list(file_name_before_six)
                else:
                    self._object_confirm = self.Object_Confirm_IKnown_Not
                    self._object_name = None
            else:
                # 运行到此的文件,如果格式为以下,则默认为附属文件
                affiliated_ext_list = [
                    'mdb', 'shp', 'shx', 'dbf', 'sbx', 'prj', 'sbn'
                ]
                if file_ext.lower() in affiliated_ext_list:
                    self._object_confirm = self.Object_Confirm_IKnown_Not
                    self._object_name = None
                else:
                    return self.Object_Confirm_IUnKnown, self._object_name
        else:
            # 运行到此的文件,如果格式为以下,则默认为附属文件
            affiliated_ext_list = [
                'mdb', 'shp', 'shx', 'dbf', 'sbx', 'prj', 'sbn'
            ]
            if file_ext.lower() in affiliated_ext_list:
                self._object_confirm = self.Object_Confirm_IKnown_Not
                self._object_name = None
            else:
                return self.Object_Confirm_IUnKnown, self._object_name

        return self._object_confirm, self._object_name
    def classified(self):
        """
        设计国土行业数据guoqing_scene_block的验证规则(国情影像—分块),带数字 F-1/F-2
        完成 负责人 王学谦 在这里检验guoqing_scene_block的识别规则
        :return:
        """
        super().classified()
        file_main_name = self.file_info.file_main_name
        file_ext = self.file_info.file_ext  # 初始化需要的参数
        file_path = self.file_info.file_path
        file_object_name = file_main_name[:]  # 这里需要取得规则匹配用的‘对象名’,即去除尾部字母等字符的名

        # 正则表达式,(?i)代表大小写不敏感,^代表字符串开头,$代表字符串结尾
        # [a-z]指匹配所有小写字母,配合(?i)匹配所有字母,{2}代表前面的匹配模式匹配2次,即[a-z]{2}匹配两个字母
        # \d匹配数字,即[0-9],即\d+匹配一个或多个非空字符,\d{4}匹配四个任意数字
        # [0123]一般指匹配一个括号中任意字符,即匹配0到3
        # \S用于匹配所有非空字符,+代表匹配前面字符的数量为至少一个,即\S+匹配一个或多个非空字符
        if len(file_main_name) < 13:
            return self.Object_Confirm_IUnKnown, self._object_name

        if CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+'
                                                r'\d{4}[01]\d[0123]\d[a-z][-]\d+$'):  # [a-z][-]\d+$结尾为字母-数字
            # re.findall获取在正则表达式中所加括号,括号中的字符,这里去剔除结尾字母-数字后的字符
            file_object_name_list = re.findall(r'(?i)^([a-z]{2}\S+\d{4}[01]\d[0123]\d)[a-z][-]\d+$',
                                               file_main_name)
            file_object_name = file_object_name_list[0]
        elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+'  # 尾部只有单个字母的情况
                                                  r'\d{4}[01]\d[0123]\d[FMP]$') and \
                CUtils.equal_ignore_case(file_ext.lower(), 'img'):
            return self.Object_Confirm_IUnKnown, self._object_name
        elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+'  # 尾部只有单个字母的情况
                                                  r'\d{4}[01]\d[0123]\d[a-z]$'):
            file_object_name = file_main_name[:-1]
        # 下面正则:开头两个字母,字母后任意数量字符,而后匹配8位时间,4位任意数字(年份),[01]\d为月份,[0123]\d日
        elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+'  # 尾部没字母取原本主名
                                                  r'\d{4}[01]\d[0123]\d$'):
            pass
        elif CUtils.text_match_re(file_main_name, r'(?i)^[a-z]{2}\S+'  # 尾部无字母,但是有任意附加字符的情况
                                                  r'\d{4}[01]\d[0123]\d\S+$'):
            file_object_name_list = re.findall(r'(?i)^([a-z]{2}\S+\d{4}[01]\d[0123]\d)\S+$',
                                               file_main_name)
            file_object_name = file_object_name_list[0]

        match_str = '(?i)^' + file_object_name + r'[FMP][-]\d+.img$'
        check_file_main_name_exist = \
            CFile.find_file_or_subpath_of_path(file_path, match_str, CFile.MatchType_Regex)
        if not check_file_main_name_exist:  # 检查主文件存在性
            return self.Object_Confirm_IUnKnown, self._object_name

        """文件名第1-2位为字母,倒数1-3位是在F-1/P-1/M-1中,倒数4-11位是数字"""
        name_sub_1_to_2 = file_object_name[0:2]
        name_sub_backwards_11_to_4 = file_object_name[-8:]
        if CUtils.text_is_alpha(name_sub_1_to_2) is False \
                or CUtils.text_is_numeric(name_sub_backwards_11_to_4) is False:
            return self.Object_Confirm_IUnKnown, self._object_name
        # 取得尾部的字母
        name_sub_backwards_fmp_list = re.findall(r'(?i)^[a-z]{2}\S+\d{4}[01]\d[0123]\d([a-z])[-]\d+$',
                                                 file_main_name)
        # 取得尾部的数字
        name_sub_backwards_num_list = re.findall(r'(?i)^[a-z]{2}\S+\d{4}[01]\d[0123]\d[a-z][-](\d+)$',
                                                 file_main_name)
        # 如果尾部没有数字与字母,但是已经通过前面的过滤,则默认为附属文件
        if len(name_sub_backwards_fmp_list) > 0 and len(name_sub_backwards_num_list) > 0:
            name_sub_backwards_fmp = name_sub_backwards_fmp_list[0]
            name_sub_backwards_num = name_sub_backwards_num_list[0]
            match_str_f = '(?i)^' + file_object_name + r'[F][-][' + name_sub_backwards_num + '].img$'
            match_str_fm = '(?i)^' + file_object_name + r'[FM][-][' + name_sub_backwards_num + '].img$'
            # 作为对象的主文件存在优先级,F-M-P,比如需要F的文件不存在,M才能是主文件
            if CUtils.equal_ignore_case(name_sub_backwards_fmp.lower(), 'f') \
                    and CUtils.equal_ignore_case(file_ext.lower(), 'img'):
                self._object_confirm = self.Object_Confirm_IKnown
                self._object_name = file_main_name
                self.add_file_to_detail_list(file_object_name, name_sub_backwards_num)
            elif CUtils.equal_ignore_case(name_sub_backwards_fmp.lower(), 'm') \
                    and CUtils.equal_ignore_case(file_ext.lower(), 'img') \
                    and not CFile.find_file_or_subpath_of_path(file_path, match_str_f, CFile.MatchType_Regex):
                self._object_confirm = self.Object_Confirm_IKnown
                self._object_name = file_main_name
                self.add_file_to_detail_list(file_object_name, name_sub_backwards_num)
            elif CUtils.equal_ignore_case(name_sub_backwards_fmp.lower(), 'p') \
                    and CUtils.equal_ignore_case(file_ext.lower(), 'img') \
                    and not CFile.find_file_or_subpath_of_path(file_path, match_str_fm, CFile.MatchType_Regex):
                self._object_confirm = self.Object_Confirm_IKnown
                self._object_name = file_main_name
                self.add_file_to_detail_list(file_object_name, name_sub_backwards_num)
            else:
                self._object_confirm = self.Object_Confirm_IKnown_Not
                self._object_name = None
        else:
            self._object_confirm = self.Object_Confirm_IKnown_Not
            self._object_name = None

        return self._object_confirm, self._object_name