Пример #1
0
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    speaker_ids_unique = np.unique(speaker_ids)
    speaker_to_speaker_id = {}
    for i, j in zip(speakers, speaker_ids_unique):
        speaker_to_speaker_id[i] = j
    wav_paths = vctk.WavFileDataSource(in_dir,
                                       speakers=speakers).collect_files()

    _ignore_speaker = hparams.not_for_train_speaker.split(", ")
    ignore_speaker = [speaker_to_speaker_id[i] for i in _ignore_speaker]
    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        if speaker_id in ignore_speaker:
            continue
        futures.append(
            executor.submit(
                partial(_process_utterance, out_dir, index + 1, speaker_id,
                        wav_path, text)))
    return [future.result() for future in tqdm(futures)]
Пример #2
0
    def __init__(self):
        speakers = list()
        for file in os.listdir(os.path.join(hparams.VCTK_dataset_path,
                                            "wav48")):
            speakers.append(str(file[1:4]))
        self.speaker_list = speakers

        # Shuffle Speakers
        if not os.path.exists(hparams.shuffle_speaker_file):
            shuffle_list = [i for i in range(len(self.speaker_list))]
            random.shuffle(shuffle_list)
            shuffle_list = shuffle_list[0:hparams.shuffle_speaker_num]
            self.speaker_list = self.shuffle(self.speaker_list, shuffle_list)

            with open(hparams.shuffle_speaker_file, "w") as f:
                for speaker in shuffle_list:
                    f.write(str(speaker) + "\n")
        else:
            shuffle_list = list()
            with open(hparams.shuffle_speaker_file, "r") as f:
                for ele in f.readlines():
                    shuffle_list.append(int(ele))

            self.speaker_list = self.shuffle(self.speaker_list, shuffle_list)

        td = vctk.TranscriptionDataSource(hparams.VCTK_dataset_path,
                                          speakers=self.speaker_list)
        transcriptions = td.collect_files()
        wav_paths = vctk.WavFileDataSource(
            hparams.VCTK_dataset_path,
            speakers=self.speaker_list).collect_files()

        self.text = transcriptions
        self.wav_paths = wav_paths
Пример #3
0
    def __init__(self, dataset_path=hparams.dataset_path):
        speakers = list()
        for file in os.listdir(os.path.join(hparams.VCTK_dataset_path,
                                            "wav48")):
            speakers.append(str(file[1:4]))
        self.speaker_list = speakers
        # print(speakers)
        td = vctk.TranscriptionDataSource(hparams.VCTK_dataset_path,
                                          speakers=speakers)
        transcriptions = td.collect_files()
        wav_paths = vctk.WavFileDataSource(hparams.VCTK_dataset_path,
                                           speakers=speakers).collect_files()

        self.dataset_path = dataset_path
        self.text_path = os.path.join(self.dataset_path, "train.txt")
        self.text = transcriptions
        self.wav_paths = wav_paths
Пример #4
0
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    wav_paths = vctk.WavFileDataSource(
        in_dir, speakers=speakers).collect_files()

    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
        help='the log level (DEBUG, INFO, WARNING, ERROR, or CRITICAL)')
    parser.add_argument('data_root', type=str, help='Data root')

    args = parser.parse_args()

    log_level = args.log.upper()
    logging.getLogger().setLevel(log_level)
    disfluencies = set(['uh', 'um'])

    data_root = args.data_root

    # Do for all speakers
    speakers = vctk.available_speakers

    # Collect all transcripts/wav files
    td = vctk.TranscriptionDataSource(data_root, speakers=speakers)
    transcriptions = td.collect_files()
    wav_paths = vctk.WavFileDataSource(data_root,
                                       speakers=speakers).collect_files()

    # Save dir
    save_dir = join(data_root, "lab")
    if not exists(save_dir):
        os.makedirs(save_dir)

    resources = gentle.Resources()

    for idx in tqdm(range(len(wav_paths))):
        transcript = transcriptions[idx]
        audiofile = wav_paths[idx]
        lab_path = audiofile.replace("wav48/", "lab/").replace(".wav", ".lab")
Пример #6
0
import os
from nnmnkwii.datasets import vctk
import hparams as hp

# speakers = vctk.available_speakers
# print(speakers)
# print(len(speakers))

speakers = list()
for file in os.listdir(os.path.join(hp.vctk_processed, "wav48")):
    speakers.append(str(file[1:4]))
# print(speakers)
# print(len(speakers))

td = vctk.TranscriptionDataSource(hp.vctk_processed, speakers=speakers)
transcriptions = td.collect_files()
wav_paths = vctk.WavFileDataSource(hp.vctk_processed,
                                   speakers=speakers).collect_files()

print(transcriptions[32306])
print(wav_paths[32306])
Пример #7
0
def prepare_txt_dict():
    speakers = vctk.available_speakers
    td = vctk.TranscriptionDataSource(hp.vctk_path, speakers=speakers)
    transcriptions = td.collect_files()
    wav_paths = vctk.WavFileDataSource(hp.vctk_path,
                                       speakers=speakers).collect_files()

    executor = ProcessPoolExecutor(max_workers=cpu_count())
    futures = list()

    save_name_list = list()

    if not os.path.exists("processed"):
        os.mkdir("processed")

    for ind in range(len(wav_paths)):
        savename = os.path.basename(
            wav_paths[ind])[0:len(os.path.basename(wav_paths[ind])) -
                            4] + ".txt"
        savename = os.path.join("processed", savename)
        save_name_list.append(savename)
        # print(savename)
    print("Get Name Done.")

    lists_P = list()

    with g2p.Session():
        for i, text in enumerate(transcriptions):

            list_not_alpha = list()
            for ind, ele in enumerate(text):
                if (not ele.isalpha()) and (ele != ' '):
                    list_not_alpha.append(ind)

            # print(list_not_alpha)

            cnt = 0
            for ind in list_not_alpha:
                text = delete_alpha_str(text, ind - cnt)
                cnt = cnt + 1

            # print(text + "######")

            # os.path.basename(wav_paths[ind])[0:len(os.path.basename(wav_paths[ind]))-4]
            # print(os.path.basename(wav_paths[ind])[0:len(os.path.basename(wav_paths[ind]))-4])
            list_P = g2p.g2p(text)
            # print("...")
            # prepare_txt(savename, text)
            # futures.append(executor.submit(partial(prepare_txt, save_name_list[ind], list_P)))
            lists_P.append(list_P)

            if i % 100 == 0:
                print(i)

    print("Get P Done.")

    for ind, list_P in enumerate(lists_P):
        futures.append(
            executor.submit(partial(prepare_txt, save_name_list[ind], list_P)))

    print("Prepare Done.")

    words_dict = dict()

    for future in futures:
        # print(future.result())
        words_dict.update(future.result())

    # print(word_P_dict)
    with open("words_dict.txt", "w") as f:
        for key in words_dict:
            temp_str_P = str()
            for P in words_dict[key]:
                temp_str_P = temp_str_P + P + " "
            str_write = key + "    " + temp_str_P
            f.write(str_write + "\n")