示例#1
0
文件: module.py 项目: lxw566666/hlp
def evaluate(model: tf.keras.Model,
             data_path: str,
             max_len: int,
             vocab_size: int,
             max_train_data_size: int,
             batch_size: int,
             buffer_size: int,
             tokenized_type: str = "phoneme"):
    """
    评估模块
    :param model: 模型
    :param data_path: 文本数据路径
    :param max_len: 文本序列最大长度
    :param vocab_size: 词汇大小
    :param tokenized_type: 分词类型,默认按音素分词,模式:phoneme(音素)/word(单词)/char(字符)
    :param buffer_size: Dataset加载缓存大小
    :param batch_size: Dataset加载批大小
    :param max_train_data_size: 最大训练数据量
    :return: 无返回值
    """
    dataset, _, steps_per_epoch, _ = \
        _dataset.load_data(train_data_path=data_path, max_len=max_len, vocab_size=vocab_size,
                           batch_size=batch_size, buffer_size=buffer_size, tokenized_type=tokenized_type,
                           max_train_data_size=max_train_data_size)

    j = 0
    score_sum = 0
    for (batch, (mel, stop_token,
                 sentence)) in enumerate(dataset.take(steps_per_epoch)):
        for i in range(sentence.shape[0]):
            new_input_ids = sentence[i]
            new_input_ids = tf.expand_dims(new_input_ids, axis=0)
            mel_outputs, mel_outputs_postnet, gate_outputs, alignments = model.inference(
                new_input_ids)
            mel2 = mel[i]
            mel2 = tf.expand_dims(mel2, axis=0)
            mel2 = tf.transpose(mel2, [0, 2, 1])
            score = spec_distance(mel_outputs_postnet, mel2)
            score_sum += score
            j = j + 1
            print('第{}个样本的欧式距离为:{}'.format(j, score))
    print("样本平均欧式距离为:", score_sum / j)
示例#2
0
文件: module.py 项目: lxw566666/hlp
def generate(model: tf.keras.Model,
             max_db: int,
             ref_db: int,
             sr: int,
             max_len: int,
             wave_save_dir: str,
             n_fft: int,
             n_mels: int,
             pre_emphasis: float,
             n_iter: int,
             hop_length: int,
             cmu_dict_path: str,
             win_length: int,
             dict_path: str = "",
             tokenized_type: str = "phoneme"):
    """
    生成语音的方法
    :param model: 模型
    :param max_len: 句子序列最大长度
    :param wave_save_dir: 合成的音频保存目录
    :param n_fft: FFT窗口大小
    :param n_mels: 产生的梅尔带数
    :param hop_length: 帧移
    :param n_iter: 指针
    :param win_length: 每一帧音频都由window()加窗,窗长win_length,然后用零填充以匹配N_FFT
    :param max_db: 峰值分贝值
    :param ref_db: 参考分贝值
    :param sr: 采样率
    :param pre_emphasis: 预加重
    :param dict_path: 字典路径
    :param cmu_dict_path: 音素字典路径
    :param tokenized_type: 分词类型
    :return: 无返回值
    """
    if not os.path.exists(wave_save_dir):
        os.makedirs(wave_save_dir)

    i = 0
    # 抓取文本数据
    while True:
        i = i + 1
        b = str(i)
        print()
        seq = input("请输入您要合成的话,输入ESC结束:")
        if seq == 'ESC':
            break
        sequences_list = []
        sequences_list.append(
            text_to_phonemes(text=seq, cmu_dict_path=cmu_dict_path))
        if tokenized_type == "phoneme":
            input_ids = text_to_sequence_phoneme(texts=sequences_list,
                                                 max_len=max_len)
        else:
            with open(dict_path, 'r', encoding="utf-8") as dict_file:
                json_string = dict_file.read().strip().strip("\n")
                tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(
                    json_string)
                input_ids = tokenizer.texts_to_sequences(sequences_list)
                input_ids = tf.keras.preprocessing.sequence.pad_sequences(
                    input_ids, max_len=max_len, padding="post")
        input_ids = tf.convert_to_tensor(input_ids)
        # 预测
        mel_outputs, mel_outputs_postnet, gate_outputs, alignments = model.inference(
            input_ids)

        # 生成预测声音
        wav = melspectrogram2wav(mel_outputs_postnet[0].numpy(), max_db,
                                 ref_db, sr, n_fft, n_mels, pre_emphasis,
                                 n_iter, hop_length, win_length)
        name = wave_save_dir + '\\generated' + b + '.wav'
        wave.write(name, rate=sr, data=wav)
        playsound(name)
        print("已合成,路径:{}".format(name))
    print("合成结束")