Пример #1
0
def create_or_update_weights_map_main(base_dir: str,
                                      merge_name: str,
                                      weights_merge_name: str,
                                      template_map: Optional[str] = None):
    merge_dir = get_merged_dir(base_dir, merge_name)
    assert os.path.isdir(merge_dir)
    orig_prep_dir = get_merged_dir(base_dir, weights_merge_name)
    assert os.path.isdir(orig_prep_dir)

    logger = getLogger(__name__)
    logger.info(f"Creating/updating weights map for {weights_merge_name}...")

    if template_map is not None:
        _template_map = SymbolsMap.load(template_map)
    else:
        _template_map = None

    if weights_map_exists(merge_dir, weights_merge_name):
        existing_map = load_weights_map(merge_dir, weights_merge_name)
    else:
        existing_map = None

    weights_map, symbols = create_or_update_weights_map(
        orig=load_merged_symbol_converter(orig_prep_dir).get_all_symbols(),
        dest=load_merged_symbol_converter(merge_dir).get_all_symbols(),
        existing_map=existing_map,
        template_map=_template_map,
    )

    save_weights_map(merge_dir, weights_merge_name, weights_map)
    save_weights_symbols(merge_dir, weights_merge_name, symbols)
Пример #2
0
def add_text(base_dir: str,
             merge_name: str,
             text_name: str,
             filepath: Optional[str],
             lang: Language,
             text: Optional[str] = None):
    assert text_name is not None and text_name != ""
    logger = getLogger(__name__)
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    if not os.path.isdir(merge_dir):
        logger.error("Please prepare data first.")
    else:
        logger.info("Adding text...")
        text_input = ""
        if filepath is None:
            assert text is not None
            text_input = text
        else:
            text_input = read_text(filepath)
        symbol_ids, data = infer_add(
            text=text_input,
            lang=lang,
            logger=logger,
        )
        print("\n" + data.get_formatted(symbol_id_dict=symbol_ids,
                                        accent_id_dict=load_merged_accents_ids(
                                            merge_dir)))
        text_dir = get_text_dir(merge_dir, text_name, create=True)
        _save_text_csv(text_dir, data)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, merge_name, text_name)
        _check_for_unknown_symbols(base_dir, merge_name, text_name)
Пример #3
0
def map_text(base_dir: str,
             merge_name: str,
             text_name: str,
             symbols_map_path: str,
             ignore_arcs: bool = True):
    logger = getLogger(__name__)
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    text_dir = get_text_dir(merge_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        logger.error("Please add text first.")
    else:
        symbol_ids, updated_sentences = sents_map(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir),
            symbols_map=SymbolsMap.load(symbols_map_path),
            ignore_arcs=ignore_arcs,
            logger=logger,
        )

        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=symbol_ids,
            accent_id_dict=load_merged_accents_ids(merge_dir)))
        _save_text_csv(text_dir, updated_sentences)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, merge_name, text_name)
        _check_for_unknown_symbols(base_dir, merge_name, text_name)
Пример #4
0
def ipa_convert_text(base_dir: str,
                     merge_name: str,
                     text_name: str,
                     ignore_tones: bool = False,
                     ignore_arcs: bool = True,
                     consider_ipa_annotations: bool = False,
                     mode: Optional[EngToIpaMode] = None):
    logger = getLogger(__name__)
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    text_dir = get_text_dir(merge_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        logger.error("Please add text first.")
    else:
        logger.info("Converting text to IPA...")
        symbol_ids, updated_sentences = sents_convert_to_ipa(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir),
            ignore_tones=ignore_tones,
            ignore_arcs=ignore_arcs,
            mode=mode,
            consider_ipa_annotations=consider_ipa_annotations,
            logger=logger,
        )
        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=symbol_ids,
            accent_id_dict=load_merged_accents_ids(merge_dir)))
        _save_text_csv(text_dir, updated_sentences)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, merge_name, text_name)
        _check_for_unknown_symbols(base_dir, merge_name, text_name)
Пример #5
0
def create_or_update_inference_map_main(base_dir: str,
                                        merge_name: str,
                                        template_map: Optional[str] = None):
    logger = getLogger(__name__)
    logger.info("Creating/updating inference map...")
    merge_dir = get_merged_dir(base_dir, merge_name)
    assert os.path.isdir(merge_dir)

    all_symbols = get_all_symbols(merge_dir)

    if template_map is not None:
        _template_map = SymbolsMap.load(template_map)
    else:
        _template_map = None

    if infer_map_exists(merge_dir):
        existing_map = load_infer_map(merge_dir)
    else:
        existing_map = None

    infer_map, symbols = create_or_update_inference_map(
        orig=load_merged_symbol_converter(merge_dir).get_all_symbols(),
        dest=all_symbols,
        existing_map=existing_map,
        template_map=_template_map,
    )

    save_infer_map(merge_dir, infer_map)
    save_infer_symbols(merge_dir, symbols)
Пример #6
0
def map_to_prep_symbols(base_dir: str,
                        merge_name: str,
                        text_name: str,
                        ignore_arcs: bool = True):
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    assert os.path.isdir(merge_dir)
    assert infer_map_exists(merge_dir)

    symb_map_path = get_infer_map_path(merge_dir)
    map_text(base_dir, merge_name, text_name, symb_map_path, ignore_arcs)
Пример #7
0
def get_infer_sentences(base_dir: str, merge_name: str,
                        text_name: str) -> InferSentenceList:
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    text_dir = get_text_dir(merge_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print(f"The text '{text_name}' doesn't exist.")
        assert False
    result = InferSentenceList.from_sentences(
        sentences=load_text_csv(text_dir),
        accents=load_merged_accents_ids(merge_dir),
        symbols=load_text_symbol_converter(text_dir))

    return result
Пример #8
0
def _accent_template(base_dir: str, merge_name: str, text_name: str):
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    text_dir = get_text_dir(merge_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print("Please add text first.")
    else:
        print("Updating accent template...")
        accented_symbol_list = sents_accent_template(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir),
            accent_ids=load_merged_accents_ids(merge_dir),
        )
        _save_accents_csv(text_dir, accented_symbol_list)
Пример #9
0
def _check_for_unknown_symbols(base_dir: str, merge_name: str, text_name: str):
    infer_sents = get_infer_sentences(base_dir, merge_name, text_name)

    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    logger = getLogger(__name__)
    unknown_symbols_exist = infer_sents.replace_unknown_symbols(
        model_symbols=load_merged_symbol_converter(merge_dir), logger=logger)

    if unknown_symbols_exist:
        logger.info(
            "Some symbols are not in the prepared dataset symbolset. You need to create an inference map and then apply it to the symbols."
        )
    else:
        logger.info(
            "All symbols are in the prepared dataset symbolset. You can now synthesize this text."
        )
Пример #10
0
def accent_apply(base_dir: str, merge_name: str, text_name: str):
    logger = getLogger(__name__)
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    text_dir = get_text_dir(merge_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        logger.error("Please add text first.")
    else:
        logger.info("Applying accents...")
        updated_sentences = sents_accent_apply(
            sentences=load_text_csv(text_dir),
            accented_symbols=_load_accents_csv(text_dir),
            accent_ids=load_merged_accents_ids(merge_dir),
        )
        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=load_text_symbol_converter(text_dir),
            accent_id_dict=load_merged_accents_ids(merge_dir)))
        _save_text_csv(text_dir, updated_sentences)
        _check_for_unknown_symbols(base_dir, merge_name, text_name)
Пример #11
0
def normalize_text(base_dir: str, merge_name: str, text_name: str):
    logger = getLogger(__name__)
    merge_dir = get_merged_dir(base_dir, merge_name, create=False)
    text_dir = get_text_dir(merge_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        logger.error("Please add text first.")
    else:
        logger.info("Normalizing text...")
        symbol_ids, updated_sentences = sents_normalize(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir),
            logger=logger,
        )
        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=symbol_ids,
            accent_id_dict=load_merged_accents_ids(merge_dir)))
        _save_text_csv(text_dir, updated_sentences)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, merge_name, text_name)
        _check_for_unknown_symbols(base_dir, merge_name, text_name)