示例#1
0
def parse(dir_path: str, logger: Logger = getLogger()) -> PreDataList:
    if not os.path.exists(dir_path):
        logger.exception(f"Directory not found: {dir_path}!")
        raise Exception()

    readme_path = os.path.join(dir_path, "README.md")
    readme = read_lines(readme_path)
    readme = readme[34:58]
    speakers_dict = {}
    for speaker_details in readme:
        name, gender, accent, _, _ = speaker_details[1:-1].split("|")
        speakers_dict[name] = gender, accent

    speaker_folders = get_subfolders(dir_path)
    lang = Language.ENG

    entries = PreDataList()

    logger.info("Parsing files...")
    for speaker_folder in tqdm(speaker_folders):
        speaker_name = get_basename(speaker_folder)
        if speaker_name not in speakers_dict.keys():
            logger.info(f"Skipping {speaker_name}")
            continue
        wavs = get_filepaths(os.path.join(speaker_folder, "wav"))
        # only 150, they do not contain good IPA
        annotations = get_filepaths(os.path.join(speaker_folder, "annotation"))
        textgrids = get_filepaths(os.path.join(speaker_folder, "textgrid"))
        transcripts = get_filepaths(os.path.join(speaker_folder, "transcript"))

        assert len(wavs) == len(textgrids) == len(transcripts)

        speaker_name = get_basename(speaker_folder)
        speaker_gender, speaker_accent = speakers_dict[speaker_name]
        accent_name = f"{speaker_accent}-{speaker_name}"
        gender = Gender.MALE if speaker_gender == "M" else Gender.FEMALE

        for wav, textgrid, transcript in zip(wavs, textgrids, transcripts):
            text_en = read_text(transcript)
            text_en = f"{text_en}."
            symbols = text_to_symbols(text_en, lang)

            entry = PreData(name=get_basename(wav),
                            speaker_name=speaker_name,
                            text=text_en,
                            wav_path=wav,
                            symbols=symbols,
                            accents=[accent_name] * len(symbols),
                            gender=gender,
                            lang=lang)

            entries.append(entry)

    entries.sort(key=sort_arctic, reverse=False)
    logger.info(
        f"Parsed {len(entries)} entries from {len(speakers_dict)} speakers.")

    return entries
示例#2
0
def parse(dir_path: str, logger: Logger = getLogger()) -> PreDataList:
    if not os.path.exists(dir_path):
        print("Directory not found:", dir_path)
        raise Exception()

    speakers_path = os.path.join(dir_path, "SPEAKERS.txt")
    speakers = read_lines(speakers_path)
    speakers = speakers[12:]
    speakers_dict = {}
    for speaker_details in speakers:
        s_id, gender, _, _, name = speaker_details.split(" | ")
        speakers_dict[s_id.strip()] = name.strip(), gender.strip()

    lang = Language.ENG

    entries = PreDataList()

    logger.info("Parsing files...")
    for dataset_folder in tqdm(get_subfolders(dir_path)):
        logger.info(f"Parsing {get_basename(dataset_folder)}...")

        for speaker_folder in tqdm(get_subfolders(dataset_folder)):
            speaker_id = get_basename(speaker_folder)
            speaker_name, speaker_gender = speakers_dict[speaker_id]
            accent_name = speaker_name
            gender = Gender.MALE if speaker_gender == "M" else Gender.FEMALE

            for chapter_folder in get_subfolders(speaker_folder):
                files = get_filepaths(chapter_folder)
                wavs = [x for x in files if x.endswith(".wav")]
                texts = [x for x in files if x.endswith(".normalized.txt")]
                assert len(wavs) == len(texts)

                for wav_file, text_file in zip(wavs, texts):
                    assert get_basename(wav_file) == get_basename(
                        text_file)[:-len(".normalized")]
                    text_en = read_text(text_file)
                    symbols = text_to_symbols(text_en, lang)

                    entry = PreData(name=get_basename(wav_file),
                                    speaker_name=speaker_name,
                                    text=text_en,
                                    wav_path=wav_file,
                                    symbols=symbols,
                                    accents=[accent_name] * len(symbols),
                                    gender=gender,
                                    lang=lang)

                    entries.append(entry)

    entries.sort(key=sort_libri, reverse=False)
    logger.info(
        f"Parsed {len(entries)} entries from {len(speakers_dict)} speakers.")

    return entries
示例#3
0
 def load_from_file(cls, filepath: str):
     loaded = parse_json(filepath)
     loaded = OrderedDict(loaded.items())
     values = list(loaded.values())
     assert len(values) > 0
     is_v2 = isinstance(values[0], list)
     if is_v2:
         tmp = [(data[1], int(symbol_id))
                for symbol_id, data in loaded.items()]
         tmp.sort(key=lambda x: x[1])
         ids_to_symbols = OrderedDict(tmp)
         file_name = get_basename(filepath)
         backup_path = os.path.join(os.path.dirname(filepath),
                                    f"{file_name}.v2.json")
         copyfile(filepath, backup_path)
         res = cls.from_raw(ids_to_symbols)
         res.save(filepath)
         return res
     ids_to_symbols = loaded
     return cls.from_raw(ids_to_symbols)
示例#4
0
  def test_get_basename_of_dir_w_slash(self):
    path = "/a/b/c/test/"

    res = get_basename(path)

    self.assertEqual("", res)
示例#5
0
  def test_get_basename_of_filename(self):
    path = "test.wav.xyz"

    res = get_basename(path)

    self.assertEqual("test.wav", res)
示例#6
0
  def test_get_basename_full_path(self):
    path = "/a/b/c/test.wav.xyz"

    res = get_basename(path)

    self.assertEqual("test.wav", res)
示例#7
0
def get_infer_dir(train_dir: str, wav_path: str, iteration: int):
  input_name = get_basename(wav_path)
  subdir_name = f"{datetime.datetime.now():%Y-%m-%d,%H-%M-%S},wav={input_name},it={iteration}"
  return get_subdir(get_inference_root_dir(train_dir), subdir_name, create=True)