def paragraph_tag_cleanup(debug_dir: str, tag_dict: dict, line: str): """ If the tag registry shows a closed paragraph, insert an open paragraph tag. If it shows an open paragraph, close it and open a new paragraph. """ tag_registry_file = os.path.join(debug_dir, "tag_registry.json") with open(tag_registry_file) as tag_registry_pre: tag_registry = json.load(tag_registry_pre) tag_closed = "0" if tag_registry["paragraph"] == tag_closed: content_update = tag_dict["paragraph-beg"] build_output_file.bof_processor(update_output=content_update, main_dict=main_dict) try: if logger_debug.isEnabledFor(logging.DEBUG): msg = str(tag_dict["paragraph-beg"] + f"{line}") logger_debug.error(msg) except AttributeError: logging.exception("Check setLevel for logger_debug.") else: content_update = tag_dict["paragraph-end"] + tag_dict["paragraph-beg"] build_output_file.bof_processor(update_output=content_update, main_dict=main_dict) try: if logger_debug.isEnabledFor(logging.DEBUG): msg = str(tag_dict["paragraph-end"] + tag_dict["paragraph-beg"] + f"{line}") logger_debug.error(msg) except AttributeError: logging.exception("Check setLevel for logger_debug.")
def ti_processor(main_dict: dict, cw_text: str) -> None: tag_dict_file = os.path.join(main_dict["dicts_dir"], "xml_tags.json") with open(tag_dict_file, "r+") as tag_dict_file_pre: tag_dict_options = json.load(tag_dict_file_pre) tag_set = str(main_dict["tag_set"]) tag_dict = tag_dict_options[tag_set] tag_empty = tag_dict["missing"][0] tag = tag_empty.replace("zzz", str(main_dict["line_to_parse"])) tag = tag.replace("aaa", cw_text) build_output_file.bof_processor(update_output=tag, main_dict=main_dict)
def tc_processor(main_dict: dict) -> dict: xml_tags_file = os.path.join(main_dict["dicts_dir"], "xml_tags.json") with open(xml_tags_file, "r+") as xml_tags_pre: xml_tags_dicts = json.load(xml_tags_pre) xml_tags = xml_tags_dicts[str(main_dict["tag_set"])] status_list = ["par", "section", "body", "bodytext", "wrapper"] for tag in status_list: tag_info = { "name": tag, "tag_open_str": xml_tags[tag][0], "tag_close_str": xml_tags[tag][1], "tag_setting": "close", "tag_set": main_dict["tag_set"] } main_dict, update_output = tag_check.tc_processor( tag_info=tag_info, main_dict=main_dict) build_output_file.bof_processor( update_output=update_output, main_dict=main_dict) return main_dict
def gc_processor(main_dict: dict, collections_dict: dict) -> dict: # Temp setup for testing for ele in main_dict["contents_list"]: if ele == "{" or ele == "}": pass elif re.search(main_dict["cw_regex"], ele): cw_text = "".join([i for i in ele if i.isalpha()]) cw_value = "".join([i for i in ele if i.isdigit()]) null_function = "null" try: cw_func = collections_dict[cw_text] if cw_func != null_function: tag_set = main_dict["tag_set"] tag_info = { "func": cw_func, "cw_text": cw_text, "cw_value": cw_value, "name": cw_text, "tag_open": "", "tag_close": "", "tag_set": tag_set } main_dict = control_word_to_build.cwtb_processor( tag_info=tag_info, main_dict=main_dict) else: pass except KeyError: # Add missing control word to control_words_collections.csv # file. collections_dict = csv_modifier.csvm_processor( main_dict=main_dict, cw_text=cw_text, collections_dict=collections_dict) # Add control word that cannot be processed to XML build # file. tag_insert_missing_cw.ti_processor(main_dict=main_dict, cw_text=cw_text) else: build_output_file.bof_processor(update_output=ele, main_dict=main_dict) return main_dict
def bt_processor(main_dict: dict) -> dict: # Test for backslash character as part of text. text = "" item = None try: test = re.search(r"^(\s\\\\)", main_dict["parse_text"]) if test is not item: text = "\\" build_output_file.bof_processor( update_output=text, main_dict=main_dict) main_dict["parse_text"] = main_dict["parse_text"].\ replace(text, "", 1) main_dict["parse_index"] = 1 main_dict = adjust_process_text.apt_processor(main_dict=main_dict) else: pass except TypeError: logging.exception(f"{main_dict['line_to_parse']}:" f"{main_dict['parse_index']}--" f"{main_dict['parse_text']}") return main_dict
def cwtb_processor(tag_info: dict, main_dict: dict) -> dict: base_dir = main_dict["base_dir"] cws_dir = os.path.join(base_dir, "Library/control_words_symbols/") try: tagging_mod = importlib.import_module(tag_info["func"], package=cws_dir) except ValueError: log.debug(msg=f"Module name: {tag_info['name']}") tag_info = tagging_mod.cw_func_processor(tag_info=tag_info, main_dict=main_dict) # Check whether tag is already open or closed. results = tag_check.tc_processor(tag_info=tag_info, main_dict=main_dict) main_dict = results[0] update_output = results[1] if update_output != "": build_output_file.bof_processor(update_output=update_output, main_dict=main_dict) if main_dict is None: log.debug("Main_dict is none.") sys.exit(1) return main_dict
def ct_processor(main_dict: dict) -> dict: # Test for text. item = None try: test = re.search(r"^([a-zA-Z0-9\s?.!,;:_%<>=@\-\[\]–/()\'\"“”‘’]*)", main_dict["parse_text"]) if test is not item and test[0] != "": text = test[0] build_output_file.bof_processor(update_output=test[0], main_dict=main_dict) main_dict["parse_text"] = main_dict["parse_text"].replace(text, "") main_dict["parse_index"] = 1 main_dict = adjust_process_text.apt_processor(main_dict=main_dict) else: pass except TypeError: logging.exception(f"Check_text: " f"{main_dict['processing_dict']['line_to_parse']}:" f"{main_dict['processing_dict']['parse_index']}--" f"{main_dict['processing_dict']['parse_text']}") return main_dict
def ofh_processor(main_dict: dict, config_settings_dict: dict) -> dict: header_file_dir = os.path.join(main_dict["base_dir"], "input") try: if config_settings_dict["output-file-header"] == 0: header_file_name = "defaultheader.xml" else: header_file_name = "tpresheader.xml" header_file = os.path.join(header_file_dir, header_file_name) with open(header_file, "r+") as header_file_pre: header_file_text = header_file_pre.read() main_dict = build_output_file.bof_processor( main_dict=main_dict, update_output=header_file_text) except FileNotFoundError as error: logging.exception(error, "An XML header file cannot be found or " "opened.") return main_dict
def oft_processor(main_dict: dict, config_settings_dict: dict) -> Any: """ Insert the XML tags to start the document portion of the XML file (after the header). """ start_tags = os.path.join(main_dict["dicts_dir"], "start_tags.json") transition_tags = "" with open(start_tags, "r+") as start_tags_pre: start_tag_dict = json.load(start_tags_pre) try: test_dict = {"1": start_tag_dict["1"], "2": start_tag_dict["2"], "3": start_tag_dict["3"]} transition_tags = test_dict[config_settings_dict["tag-set"]] except KeyError as error: logging.exception(error, "The tag-set number does not match an " "entry for transition tags.") transition_tags = start_tag_dict["1"] except FileNotFoundError as error: logging.exception(error, "The config_dict.json file is missing.") main_dict = build_output_file.bof_processor( main_dict=main_dict, update_output=transition_tags) return main_dict