def create_or_update_weights_map_main(base_dir: str, merge_name: str, weights_merge_name: str, template_map: Optional[str] = None): merge_dir = get_merged_dir(base_dir, merge_name) assert os.path.isdir(merge_dir) orig_prep_dir = get_merged_dir(base_dir, weights_merge_name) assert os.path.isdir(orig_prep_dir) logger = getLogger(__name__) logger.info(f"Creating/updating weights map for {weights_merge_name}...") if template_map is not None: _template_map = SymbolsMap.load(template_map) else: _template_map = None if weights_map_exists(merge_dir, weights_merge_name): existing_map = load_weights_map(merge_dir, weights_merge_name) else: existing_map = None weights_map, symbols = create_or_update_weights_map( orig=load_merged_symbol_converter(orig_prep_dir).get_all_symbols(), dest=load_merged_symbol_converter(merge_dir).get_all_symbols(), existing_map=existing_map, template_map=_template_map, ) save_weights_map(merge_dir, weights_merge_name, weights_map) save_weights_symbols(merge_dir, weights_merge_name, symbols)
def add_text(base_dir: str, merge_name: str, text_name: str, filepath: Optional[str], lang: Language, text: Optional[str] = None): assert text_name is not None and text_name != "" logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) if not os.path.isdir(merge_dir): logger.error("Please prepare data first.") else: logger.info("Adding text...") text_input = "" if filepath is None: assert text is not None text_input = text else: text_input = read_text(filepath) symbol_ids, data = infer_add( text=text_input, lang=lang, logger=logger, ) print("\n" + data.get_formatted(symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids( merge_dir))) text_dir = get_text_dir(merge_dir, text_name, create=True) _save_text_csv(text_dir, data) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def map_text(base_dir: str, merge_name: str, text_name: str, symbols_map_path: str, ignore_arcs: bool = True): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: symbol_ids, updated_sentences = sents_map( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), symbols_map=SymbolsMap.load(symbols_map_path), ignore_arcs=ignore_arcs, logger=logger, ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def ipa_convert_text(base_dir: str, merge_name: str, text_name: str, ignore_tones: bool = False, ignore_arcs: bool = True, consider_ipa_annotations: bool = False, mode: Optional[EngToIpaMode] = None): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: logger.info("Converting text to IPA...") symbol_ids, updated_sentences = sents_convert_to_ipa( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), ignore_tones=ignore_tones, ignore_arcs=ignore_arcs, mode=mode, consider_ipa_annotations=consider_ipa_annotations, logger=logger, ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def create_or_update_inference_map_main(base_dir: str, merge_name: str, template_map: Optional[str] = None): logger = getLogger(__name__) logger.info("Creating/updating inference map...") merge_dir = get_merged_dir(base_dir, merge_name) assert os.path.isdir(merge_dir) all_symbols = get_all_symbols(merge_dir) if template_map is not None: _template_map = SymbolsMap.load(template_map) else: _template_map = None if infer_map_exists(merge_dir): existing_map = load_infer_map(merge_dir) else: existing_map = None infer_map, symbols = create_or_update_inference_map( orig=load_merged_symbol_converter(merge_dir).get_all_symbols(), dest=all_symbols, existing_map=existing_map, template_map=_template_map, ) save_infer_map(merge_dir, infer_map) save_infer_symbols(merge_dir, symbols)
def map_to_prep_symbols(base_dir: str, merge_name: str, text_name: str, ignore_arcs: bool = True): merge_dir = get_merged_dir(base_dir, merge_name, create=False) assert os.path.isdir(merge_dir) assert infer_map_exists(merge_dir) symb_map_path = get_infer_map_path(merge_dir) map_text(base_dir, merge_name, text_name, symb_map_path, ignore_arcs)
def get_infer_sentences(base_dir: str, merge_name: str, text_name: str) -> InferSentenceList: merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): print(f"The text '{text_name}' doesn't exist.") assert False result = InferSentenceList.from_sentences( sentences=load_text_csv(text_dir), accents=load_merged_accents_ids(merge_dir), symbols=load_text_symbol_converter(text_dir)) return result
def _accent_template(base_dir: str, merge_name: str, text_name: str): merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): print("Please add text first.") else: print("Updating accent template...") accented_symbol_list = sents_accent_template( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), accent_ids=load_merged_accents_ids(merge_dir), ) _save_accents_csv(text_dir, accented_symbol_list)
def _check_for_unknown_symbols(base_dir: str, merge_name: str, text_name: str): infer_sents = get_infer_sentences(base_dir, merge_name, text_name) merge_dir = get_merged_dir(base_dir, merge_name, create=False) logger = getLogger(__name__) unknown_symbols_exist = infer_sents.replace_unknown_symbols( model_symbols=load_merged_symbol_converter(merge_dir), logger=logger) if unknown_symbols_exist: logger.info( "Some symbols are not in the prepared dataset symbolset. You need to create an inference map and then apply it to the symbols." ) else: logger.info( "All symbols are in the prepared dataset symbolset. You can now synthesize this text." )
def accent_apply(base_dir: str, merge_name: str, text_name: str): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: logger.info("Applying accents...") updated_sentences = sents_accent_apply( sentences=load_text_csv(text_dir), accented_symbols=_load_accents_csv(text_dir), accent_ids=load_merged_accents_ids(merge_dir), ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=load_text_symbol_converter(text_dir), accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) _check_for_unknown_symbols(base_dir, merge_name, text_name)
def normalize_text(base_dir: str, merge_name: str, text_name: str): logger = getLogger(__name__) merge_dir = get_merged_dir(base_dir, merge_name, create=False) text_dir = get_text_dir(merge_dir, text_name, create=False) if not os.path.isdir(text_dir): logger.error("Please add text first.") else: logger.info("Normalizing text...") symbol_ids, updated_sentences = sents_normalize( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), logger=logger, ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_merged_accents_ids(merge_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, merge_name, text_name) _check_for_unknown_symbols(base_dir, merge_name, text_name)