def map_text(base_dir: str, merge_name: str, text_name: str, symbols_map_path: str, ignore_arcs: bool = True): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: symbol_ids, updated_sentences = sents_map( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), symbols_map=SymbolsMap.load(symbols_map_path), ignore_arcs=ignore_arcs, logger=logger, ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def add_text(base_dir: str, merge_name: str, text_name: str, filepath: Optional[str], lang: Language, text: Optional[str] = None): assert text_name is not None and text_name != "" logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) if not os.path.isdir(merge_dir): logger.error("Please prepare data first.") else: logger.info("Adding text...") text_input = "" if filepath is None: assert text is not None text_input = text else: text_input = read_text(filepath) symbol_ids, data = infer_add( text=text_input, lang=lang, logger=logger, ) print("\n" + data.get_formatted(symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids( merge_dir))) text_dir = get_text_dir(merge_dir, text_name, create=True) _save_text_csv(text_dir, data) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def ipa_convert_text(base_dir: str, merge_name: str, text_name: str, ignore_tones: bool = False, ignore_arcs: bool = True, consider_ipa_annotations: bool = False, mode: Optional[EngToIpaMode] = None): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: logger.info("Converting text to IPA...") symbol_ids, updated_sentences = sents_convert_to_ipa( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), ignore_tones=ignore_tones, ignore_arcs=ignore_arcs, mode=mode, consider_ipa_annotations=consider_ipa_annotations, logger=logger, ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def accent_apply(base_dir: str, merge_name: str, text_name: str): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: logger.info("Applying accents...") updated_sentences = sents_accent_apply( sentences=load_text_csv(text_dir), accented_symbols=_load_accents_csv(text_dir), accent_ids=load_merged_accents_ids(merge_dir), ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=load_text_symbol_converter(text_dir), accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def get_infer_sentences(base_dir: str, merge_name: str, text_name: str) -> InferSentenceList: merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): print(f"The text '{text_name}' doesn't exist.") assert False result = InferSentenceList.from_sentences( sentences=load_text_csv(text_dir), accents=load_merged_accents_ids(merge_dir), symbols=load_text_symbol_converter(text_dir)) return result
def _accent_template(base_dir: str, merge_name: str, text_name: str): merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): print("Please add text first.") else: print("Updating accent template...") accented_symbol_list = sents_accent_template( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), accent_ids=load_merged_accents_ids(merge_dir), ) _save_accents_csv(text_dir, accented_symbol_list)
def normalize_text(base_dir: str, merge_name: str, text_name: str): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: logger.info("Normalizing text...") symbol_ids, updated_sentences = sents_normalize( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), logger=logger, ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)