Exemplo n.º 1
0
def test(params):
    from io_modules.encodings import Encodings
    from io_modules.dataset import LTSDataset

    encodings = Encodings()
    encodings.load(params.model + '.encodings')
    testset = LTSDataset(params.input)
    from models.g2p import G2P
    model = G2P(encodings)
    model.load(params.model + '-bestAcc.network')
    f = open(params.output, 'w')
    correct = 0
    last_proc = 0
    index = 0
    for entry in testset.entries:
        index += 1
        curr_proc = int(index * 100 / len(testset.entries))
        if curr_proc % 5 == 0:
            while last_proc < curr_proc:
                last_proc += 5
                sys.stdout.write(str(last_proc) + ' ')
                sys.stdout.flush()
        p_transcription = model.transcribe(entry.word)
        ok = False
        if p_transcription == entry.transcription:
            correct += 1
            ok = True
        f.write(entry.word + '\t')
        for phon in entry.transcription:
            f.write(phon + ' ')
        f.write('\t')
        for phon in p_transcription:
            f.write(phon + ' ')
        if not ok:
            f.write('***')
        f.write('\n')
    f.close()
    sys.stdout.write('done\n')
    sys.stdout.write('Word level accuracy is ' +
                     str(float(correct) / len(testset.entries)) + '\n')
Exemplo n.º 2
0
def convert(params):
    from io_modules.encodings import Encodings
    encodings = Encodings()
    encodings.load(params.model + '.encodings')
    from models.g2p import G2P
    model = G2P(encodings)
    model.load(params.model + '-bestAcc.network')
    from os import listdir
    from os.path import isfile, join
    files_tmp = [
        f for f in listdir(params.input)
        if isfile(join(params.input, f)) and f.find('txt') > -1
    ]
    for file in files_tmp:
        print(file)
        with open(join(params.input, file), 'r') as f:
            line = f.readline()
        with open(join(params.input, file), 'w') as w:
            phones = []
            for word in line.split(' '):
                phones.extend(model.transcribe(word))
            for phone in phones:
                w.write(phone + ' ')
Exemplo n.º 3
0
def train(params):
    from io_modules.encodings import Encodings
    from io_modules.dataset import LTSDataset

    sys.stdout.write('Loading datasets...\n')
    trainset = LTSDataset(params.train_file)
    devset = LTSDataset(params.dev_file)
    sys.stdout.write('Trainset has ' + str(len(trainset.entries)) +
                     ' entries\n')
    sys.stdout.write('Devset has ' + str(len(devset.entries)) + ' entries\n')
    encodings = Encodings()
    encodings.update_encodings(trainset)
    sys.stdout.write('Found ' + str(len(encodings.char2int)) + ' characters\n')
    sys.stdout.write('Found ' + str(len(encodings.phoneme2int)) +
                     ' phonemes\n')

    from trainers.g2p import G2PTrainer
    from models.g2p import G2P

    model = G2P(encodings)
    trainer = G2PTrainer()
    trainer.start_training(model, encodings, trainset, devset,
                           params.model_base, params.batch_size,
                           params.patience)
Exemplo n.º 4
0
    elif not params.txt_file:
        print("Input file is mandatory")
    elif not params.output_file:
        print("Output file is mandatory")

    memory = int(params.memory)
    # for compatibility we have to add this paramater
    params.learning_rate = 0.0001
    dynet_config.set(mem=memory, random_seed=9)
    if params.gpu:
        dynet_config.set_gpu()

    if params.g2p is not None:
        from models.g2p import G2P
        from io_modules.encodings import Encodings

        g2p_encodings = Encodings()
        g2p_encodings.load(params.g2p + '.encodings')
        g2p = G2P(g2p_encodings)
        g2p.load(params.g2p + '-bestAcc.network')
        if exists(params.g2p + '.lexicon'):
            g2p.load_lexicon(params.g2p + '.lexicon')
    else:
        g2p = None

    synthesize(params.speaker,
               params.txt_file,
               params.output_file,
               params,
               g2p=g2p)
Exemplo n.º 5
0
    def phase_1_prepare_corpus(params):
        from os import listdir
        from os.path import isfile, join
        from os.path import exists
        train_files_tmp = [
            f for f in listdir(params.train_folder)
            if isfile(join(params.train_folder, f))
        ]
        if params.dev_folder is not None:
            dev_files_tmp = [
                f for f in listdir(params.dev_folder)
                if isfile(join(params.dev_folder, f))
            ]
        else:
            dev_files_tmp = []

        if params.g2p is not None:
            from models.g2p import G2P
            from io_modules.encodings import Encodings
            g2p_encodings = Encodings()
            g2p_encodings.load(params.g2p + '.encodings')
            g2p = G2P(g2p_encodings)
            g2p.load(params.g2p + '-bestAcc.network')
            if exists(params.g2p + '.lexicon'):
                g2p.load_lexicon(params.g2p + '.lexicon')
        else:
            g2p = None

        sys.stdout.write("Scanning training files...")
        sys.stdout.flush()
        final_list = []
        for file in train_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.train_folder, lab_name)) and exists(
                    join(params.train_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        train_files = final_list
        sys.stdout.write(" found " + str(len(train_files)) +
                         " valid training files\n")
        sys.stdout.write("Scanning development files...")
        sys.stdout.flush()
        final_list = []
        for file in dev_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.dev_folder, lab_name)) and exists(
                    join(params.dev_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        dev_files = final_list
        sys.stdout.write(" found " + str(len(dev_files)) +
                         " valid development files\n")
        from io_modules.dataset import DatasetIO
        from io_modules.vocoder import MelVocoder
        from shutil import copyfile
        dio = DatasetIO()

        vocoder = MelVocoder()
        base_folder = params.train_folder
        total_files = 0
        for index in range(len(train_files)):
            total_files += 1
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(train_files)))
            sys.stdout.flush()
            base_name = train_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            tgt_txt_name = txt_name
            tgt_spc_name = spc_name
            tgt_lab_name = lab_name
            if params.prefix is not None:
                tgt_txt_name = params.prefix + "_{:05d}".format(
                    total_files) + '.txt'
                tgt_spc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.png'
                tgt_lab_name = params.prefix + "_{:05d}".format(
                    total_files) + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/train', tgt_lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/train', tgt_lab_name),
                                speaker_name=params.speaker,
                                g2p=g2p)
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/train', tgt_txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/train', tgt_spc_name))
            if params.prefix is None:
                dio.write_wave(
                    join('data/processed/train', base_name + '.orig.wav'),
                    data, sample_rate)
                array2file(mgc, join('data/processed/train',
                                     base_name + '.mgc'))
            else:
                tgt_wav_name = params.prefix + "_{:05d}".format(
                    total_files) + '.orig.wav'
                tgt_mgc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.mgc'
                dio.write_wave(join('data/processed/train', tgt_wav_name),
                               data, sample_rate)
                array2file(mgc, join('data/processed/train', tgt_mgc_name))

        sys.stdout.write('\n')
        base_folder = params.dev_folder
        for index in range(len(dev_files)):
            total_files += 1
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(dev_files)))
            sys.stdout.flush()
            base_name = dev_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            tgt_txt_name = txt_name
            tgt_spc_name = spc_name
            tgt_lab_name = lab_name
            if params.prefix is not None:
                tgt_txt_name = params.prefix + "_{:05d}".format(
                    total_files) + '.txt'
                tgt_spc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.png'
                tgt_lab_name = params.prefix + "_{:05d}".format(
                    total_files) + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/dev', tgt_lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/dev', tgt_lab_name),
                                speaker_name=params.speaker,
                                g2p=g2p)
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/dev', tgt_txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/dev', tgt_spc_name))
            if params.prefix is None:
                dio.write_wave(
                    join('data/processed/dev', base_name + '.orig.wav'), data,
                    sample_rate)
                array2file(mgc, join('data/processed/dev', base_name + '.mgc'))
            else:
                tgt_wav_name = params.prefix + "_{:05d}".format(
                    total_files) + '.orig.wav'
                tgt_mgc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.mgc'
                dio.write_wave(join('data/processed/dev', tgt_wav_name), data,
                               sample_rate)
                array2file(mgc, join('data/processed/dev', tgt_mgc_name))

        sys.stdout.write('\n')