def parse_code_strings(self) -> None: code_dict = {} for code_string in self.code_strings_to_process: get_style_codes = StyleParser(code_dict=code_dict) code_string = code_string[1:-1] code_string, current_key = StyleParser.check_stylecode( self=get_style_codes, code_string=code_string) item = None test = re.search(r"\\", code_string) while test is not item: code_string, current_key = StyleParser.check_control_words( self=StyleParser(code_dict=code_dict), code_string=code_string, current_key=current_key) test = re.search(r"\\", code_string) StyleParser.check_style_name(self=get_style_codes, code_string=code_string, current_key=current_key) dict_updater.json_dict_updater( dict_name="style_sheet_table_file.json", dict_update=code_dict, main_dict=self.main_dict) code_dict = {}
def check_info_code(self): """ Parse each code string. """ info_part_list = [ "title", "subject", "author", "manager", "company", "operator", "category", "comment", "doccomm", "hlinkbase", "keywords" ] for info_part in info_part_list: pattern = rf"{info_part}" try: test = re.search(re.escape(pattern), self.code_string) if test is not None: pattern = re.compile(r'\s(\w+|\s|\W)+') info_text = re.search(pattern, self.code_string) pre_result = info_text[0].lstrip() self.code_dict.update({info_part: pre_result[:-1]}) else: pass except (ValueError, TypeError): pass dict_updater.json_dict_updater(dict_name="info_group_file.json", dict_update=self.code_dict, main_dict=self.main_dict) self.code_dict = {}
def check_time_components(self): time_part_list = [ "creatim", "revtim", "printim", "buptim" ] for time_part in time_part_list: test = re.search(time_part, self.code_string) if test is not None: time_data_list = ["yr", "mo", "dy", "hr", "min", "sec"] for ele in time_data_list: pattern = re.compile(fr'{ele}[0-9]*') measure = re.search(pattern, self.code_string) if measure is not None: value = int(measure[0].replace(ele, "")) key = time_part + "_" + ele self.code_dict.update({key: value}) dict_updater.json_dict_updater( dict_name="info_group_file.json", dict_update=self.code_dict, main_dict=self.main_dict) else: pass else: pass self.code_dict = {}
def check_nofchars(self): test = re.search(fr'\\nofchars[0-9]+', self.code_string) if test is not None: value = int(test[0].replace('\\nofchars', "")) self.code_dict.update({"nofchars": value}) dict_updater.json_dict_updater(dict_name="info_group_file.json", dict_update=self.code_dict, main_dict=self.main_dict) else: pass self.code_dict = {}
def parse_namespace(code_strings_to_process: list, main_dict: dict) -> None: for code_string in code_strings_to_process: code_string, current_key, code_dict = namespace_code_parse( code_string=code_string, code_dict={}) code_dict = namespace_parse( code_string=code_string, current_key=current_key, code_dict=code_dict) dict_updater.json_dict_updater( dict_name="xml_namespace_table_file.json", dict_update=code_dict, main_dict=main_dict)
def parse_code_strings(self): code_dict = {} for code_string in self.code_strings_to_process: get_font_codes = FontParser(code_dict=code_dict) code_string = FontParser.delete_themes(code_string=code_string) if code_string is not None: code_string = FontParser.check_fontnum(self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_fontfamily( self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_fcharset( self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_fprq(self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_panose(self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_fname(self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_altname(self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_fontemb(self=get_font_codes, code_string=code_string) if code_string is not None: code_string = FontParser.check_fontname_tagged( self=get_font_codes, code_string=code_string) if code_string is not None: FontParser.check_cpg(self=get_font_codes, code_string=code_string) dict_updater.json_dict_updater(dict_name="font_table_file.json", dict_update=code_dict, main_dict=self.main_dict) code_dict = {}
def parse_code_strings(code_strings_list: iter, main_dict: dict) -> None: """ Parse each color table string. """ code_dict = {} cs_list = list(code_strings_list)[0] for code_string in cs_list: key = cs_list.index(code_string) code_dict.update({key: {}}) code_dict = parse_control_word(code_string=code_string, code_dict=code_dict, key=key) code_dict = parse_theme_control_word(code_string=code_string, code_dict=code_dict, key=key) dict_updater.json_dict_updater(dict_name="color_table_file.json", dict_update=code_dict, main_dict=main_dict) code_dict = {}
def check_stat_code(self): stat_code_list = [ "version", "edmins", "nofpages", "nofwords", "nofcharsws", "vern" ] for stat in stat_code_list: test = re.search(fr'\\{stat}[0-9]*', self.code_string) if test is not None: value = int(test[0].replace('\\'+stat, "")) self.code_dict.update({stat: value}) dict_updater.json_dict_updater(dict_name="info_group_file.json", dict_update=self.code_dict, main_dict=self.main_dict) else: pass self.code_dict = {}
def parse_pgp_string(code_strings_to_process: list, main_dict: dict) -> None: counter = 1000 for code_string in code_strings_to_process: code_string, current_key, code_dict = pgp_parse( code_string=code_string, code_dict={}) code_dict, current_key, code_string = ipgpn_parse( code_string=code_string, counter=counter, current_key=current_key, code_dict=code_dict) item = None test = re.search(r"\\", code_string) while test is not item: code_string, current_key, code_dict = check_control_words( code_string=code_string, current_key=current_key, code_dict=code_dict) test = re.search(r"\\", code_string) dict_updater.json_dict_updater(dict_name="pgp_table_file.json", dict_update=code_dict, main_dict=main_dict)
def code_strings_file_update(header_table: str, main_dict: dict, code_strings_list: list): code_strings_file_updater = {header_table: [code_strings_list]} dict_updater.json_dict_updater(dict_name="code_strings_file.json", main_dict=main_dict, dict_update=code_strings_file_updater)
def parse_rsid_string(code_string: str, main_dict: dict) -> None: code_dict = rsid_parse(code_string=code_string, code_dict={}) dict_updater.json_dict_updater(dict_name="rsid_table_file.json", dict_update=code_dict, main_dict=main_dict)
def build_header_tables_dict(main_dict: dict) -> None: """ Check header for existence and location of sections: <first line>, <font table>, <file table>, <color table>, <stylesheet>, <list table>, <rev table>, <rsid table>, <generator>, <info>, <xmlnstbl>. """ tables_dict = { "colortbl": {"factor": 2, "lead": r"{\\"}, "filetbl": {"factor": 2, "lead": r"{\\"}, "fonttbl": {"factor": 2, "lead": r"{\\"}, "generator": {"factor": 2, "lead": r"{\\"}, "info": {"factor": 2, "lead": r"{\\"}, "listtables": {"factor": 2, "lead": r"{\\"}, "pgptbl": {"factor": 4, "lead": r"{\\\*\\"}, "revtbl": {"factor": 2, "lead": r"{\\"}, "rsidtbl": {"factor": 4, "lead": r"{\\\*\\"}, "stylesheet": {"factor": 2, "lead": r"{\\"}, "xmlnstbl": {"factor": 4, "lead": r"{\\\*\\"} } working_input_file = main_dict["working_input_file"] for table in tables_dict: listpos = 0 while listpos in range(len(working_input_file)): line = working_input_file[listpos].strip() line = line.strip() item = None table_search = re.search(tables_dict[table]["lead"] + table, line) factor = tables_dict[table]["factor"] if table_search is not item: table_start_line = working_input_file.index(line) table_start_index = line.find(table) - factor main_dict["group_start_line"] = table_start_line main_dict["group_start_index"] = table_start_index main_dict["parse_text"] = line[main_dict["group_start_index"]:] main_dict["parse_index"] = main_dict["group_start_index"] main_dict["line_to_parse"] = main_dict["group_start_line"] main_dict = group_boundaries.define_boundaries( main_dict=main_dict) table_boundaries_info = {table: [ main_dict["group_start_line"], main_dict["group_start_index"], main_dict["group_end_line"], main_dict["group_end_index"]] } dict_updater.json_dict_updater( dict_name="header_tables_dict.json", dict_update=table_boundaries_info, main_dict=main_dict) listpos = len(working_input_file) + 1 else: listpos += 1 pass