示例#1
0
    def __init_model(self):
        input_text = "你好, 很高兴认识你"
        self.processor = AutoProcessor.from_pretrained(
            pretrained_path=config.baker_mapper_pretrained_path)
        input_ids = self.processor.text_to_sequence(input_text, inference=True)
        # tacotron2_config = AutoConfig.from_pretrained( config.tacotron2_baker )
        # self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=config.tacotron2_pretrained_path, training=False,  name="tacotron2" )
        tacotron2_config = AutoConfig.from_pretrained(config.tacotron2_baker)
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=None,
            is_build=
            False,  # don't build model if you want to save it to pb. (TF related bug)
            name="tacotron2")
        self.tacotron2.setup_window(win_front=5, win_back=5)
        _, mel_outputs, stop_token_prediction, alignment_history = self.tacotron2.inference(
            tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
            tf.convert_to_tensor([len(input_ids)], tf.int32),
            tf.convert_to_tensor([0], dtype=tf.int32))
        self.tacotron2.load_weights(config.tacotron2_pretrained_path)
        tf.saved_model.save(self.tacotron2,
                            "./test_saved",
                            signatures=self.tacotron2.inference)
        self.tacotron2 = tf.saved_model.load("./test_saved")

        mb_melgan_config = AutoConfig.from_pretrained(
            config.multiband_melgan_baker)
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=config.multiband_melgan_pretrained_path,
            name="mb_melgan")

        self.processor = AutoProcessor.from_pretrained(
            pretrained_path=config.baker_mapper_pretrained_path)
示例#2
0
    def __init__(self):
        self.converter = opencc.OpenCC('tw2s.json')
        tts_model_dir = os.environ['TTS_MODEL_DIR']
        if not os.path.exists(tts_model_dir):
            parent_dir = os.path.dirname(tts_model_dir)
            zip_file_path = os.path.join(parent_dir, 'model_files.zip')
            download_blob('dailybrief', 'models/model_files.zip', zip_file_path)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(parent_dir)
        tacotron2_config_file = os.path.join(
            tts_model_dir, 'config/tacotron2.baker.v1.yaml')
        mb_melgan_config_file = os.path.join(
            tts_model_dir, 'config/multiband_melgan.baker.v1.yaml')
        tacotron2_config = AutoConfig.from_pretrained(tacotron2_config_file)
        mb_melgan_config = AutoConfig.from_pretrained(mb_melgan_config_file)
        text2mel_model_file = os.path.join(
            tts_model_dir, 'models/tacotron-model-100000.h5')
        vocoder_model_file = os.path.join(tts_model_dir, 'models/generator-920000.h5')
        baker_mapper_file = os.path.join(tts_model_dir, 'models/baker_mapper.json')

        self.text2mel_model = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=text2mel_model_file,
            name='tacotron2'
        )
        self.vocoder_model = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=vocoder_model_file,
            name='mb_melgan'
        )
        self.processor = AutoProcessor.from_pretrained(pretrained_path=baker_mapper_file)
示例#3
0
def init_speech():
    global sf
    global tf
    global TFAutoModel
    global AutoProcessor
    import soundfile as sf
    import tensorflow as tf
    from tensorflow_tts.inference import TFAutoModel
    from tensorflow_tts.inference import AutoProcessor

    global fastspeech2
    global mb_melgan
    global processor
    # initialize fastspeech2 model.
    fastspeech2 = TFAutoModel.from_pretrained(
        "tensorspeech/tts-fastspeech2-ljspeech-en")

    # initialize mb_melgan model
    mb_melgan = TFAutoModel.from_pretrained(
        "tensorspeech/tts-mb_melgan-ljspeech-en")

    # inference
    processor = AutoProcessor.from_pretrained(
        "tensorspeech/tts-fastspeech2-ljspeech-en")
    inference("Hello sir")
    debug("Speech", "init")
示例#4
0
    def _converter_model(self):
        with open( config.tacotron2_baker ) as f:
            conf = yaml.load(f, Loader=yaml.Loader)
        conf = Tacotron2Config(**conf["tacotron2_params"])
        self.tacotron2 = TFTacotron2(config=conf, training=False, name="tacotron2", enable_tflite_convertible=True)
        self.tacotron2.setup_window(win_front=5, win_back=5)
        self.tacotron2.setup_maximum_iterations(1000) # be careful
        self.tacotron2._build()
        self.tacotron2.load_weights(config.tacotron2_pretrained_path)
        tacotron2_concrete_function = self.tacotron2.inference_tflite.get_concrete_function()
        converter = tf.lite.TFLiteConverter.from_concrete_functions( [tacotron2_concrete_function] )
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ]
        tflite_model = converter.convert()
        with open('tacotron2.tflite', 'wb') as f:
            f.write(tflite_model)
        
        print('Model size is %f MBs.' % (len(tflite_model) / 1024 / 1024.0) )

        #tacotron2_config = AutoConfig.from_pretrained( config.tacotron2_baker )
        #self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path='tacotron2.tflite', training=False,  name="tacotron2" )
        #self.tacotron2.setup_window(win_front=5, win_back=5)
        self.interpreter = tf.lite.Interpreter(model_path='tacotron2.tflite')
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        mb_melgan_config = AutoConfig.from_pretrained( config.multiband_melgan_baker )
        self.mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=config.multiband_melgan_pretrained_path, name="mb_melgan" )

        self.processor = AutoProcessor.from_pretrained(pretrained_path=config.baker_mapper_pretrained_path)
示例#5
0
    def __init__(self):
        if (Text2SpeechModel == "dc_tts"):
            self.g = Graph(mode="synthesize")
            print("Text2Speech Tensorflow Graph loaded")
        elif (Text2SpeechModel == "RTVC"):
            enc_model_fpath = os.path.join(
                root_file_path, "RTVC", "encoder/saved_models/pretrained.pt")
            syn_model_dir = os.path.join(
                root_file_path, "RTVC",
                "synthesizer/saved_models/logs-pretrained")
            voc_model_fpath = os.path.join(
                root_file_path, "RTVC",
                "vocoder/saved_models/pretrained/pretrained.pt")
            encoder.load_model(enc_model_fpath)
            self.synthesizer = Synthesizer(os.path.join(
                syn_model_dir, "taco_pretrained"),
                                           low_mem=False)
            vocoder.load_model(voc_model_fpath)
            in_fpath = os.path.join("/",
                                    *root_file_path.split("/")[:-1],
                                    "REF/refaudioRTVC/ref.wav")
            preprocessed_wav = encoder.preprocess_wav(in_fpath)
            original_wav, sampling_rate = librosa.load(in_fpath)
            preprocessed_wav = encoder.preprocess_wav(original_wav,
                                                      sampling_rate)
            embed = encoder.embed_utterance(preprocessed_wav)
            self.embeds = [embed]
        elif (Text2SpeechModel == "AudioSynth"):
            taco_pretrained_config_path = os.path.join(
                root_file_path,
                'AudioSynth/TensorFlowTTS/examples/tacotron2/conf/tacotron2.v1.yaml'
            )
            tacotron2_config = AutoConfig.from_pretrained(
                taco_pretrained_config_path)
            taco_path = os.path.join(root_file_path,
                                     "AudioSynth/tacotron2-120k.h5")
            self.tacotron2 = TFAutoModel.from_pretrained(
                config=tacotron2_config,
                pretrained_path=taco_path,
                training=False,
                name="tacotron2")

            melgan_stft_pretrained_config_path = os.path.join(
                root_file_path,
                'AudioSynth/TensorFlowTTS/examples/melgan.stft/conf/melgan.stft.v1.yaml'
            )
            melgan_stft_config = AutoConfig.from_pretrained(
                melgan_stft_pretrained_config_path)
            melgan_stft_path = os.path.join(root_file_path,
                                            "AudioSynth/melgan.stft-2M.h5")
            self.melgan_stft = TFAutoModel.from_pretrained(
                config=melgan_stft_config,
                pretrained_path=melgan_stft_path,
                name="melgan_stft")
            self.processor = AutoProcessor.from_pretrained(
                pretrained_path=os.path.join(
                    root_file_path, "AudioSynth/ljspeech_mapper.json"))
            mels, alignment_history, audios = do_synthesis(
                "Hello, how can I help you today?", self.tacotron2,
                self.melgan_stft, "TACOTRON", "MELGAN-STFT", self.processor)
示例#6
0
 def __init__(self):
     # initialize tts model. fastspeech2 or tacotron2
     self.tts_model = TFAutoModel.from_pretrained(
         "tensorspeech/tts-fastspeech2-kss-ko")
     # initialize mb_melgan model
     self.mb_melgan = TFAutoModel.from_pretrained(
         "tensorspeech/tts-mb_melgan-kss-ko")
     # inference
     self.processor = AutoProcessor.from_pretrained(
         "tensorspeech/tts-fastspeech2-kss-ko")
示例#7
0
    def __init_model(self):
        tacotron2_config = AutoConfig.from_pretrained(config.tacotron2_baker)
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config, pretrained_path=config.tacotron2_pretrained_path, training=False, name="tacotron2")
        self.tacotron2.setup_window(win_front=5, win_back=5)

        mb_melgan_config = AutoConfig.from_pretrained(config.multiband_melgan_baker)
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config, pretrained_path=config.multiband_melgan_pretrained_path, name="mb_melgan")

        self.processor = AutoProcessor.from_pretrained(pretrained_path=config.baker_mapper_pretrained_path)
示例#8
0
    def __init__(self):
        # gpu memory의 1/3 만을 할당하기로 제한
        gpu_options = tf.compat.v1.GPUOptions(
            per_process_gpu_memory_fraction=0.8)
        conf = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
        # 탄력적인 메모리 할당
        #conf.gpu_options.allow_growth = True
        session = tf.compat.v1.Session(config=conf)

        # tacotron 설정, 학습된 모델 가져오기
        module_path = os.path.dirname(os.path.abspath(__file__))
        tacotron2_config = AutoConfig.from_pretrained(
            os.path.join(module_path,
                         'examples/tacotron2/conf/tacotron2.song44k.v5.yaml'))
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=os.path.join(
                module_path,
                "examples/tacotron2/exp/train.tacotron2.song44k.v5/checkpoints/model-68000.h5"
            ),
            name="tacotron2")

        # fast speech 설정, 학습된 모델 가져오기
        fastspeech2_config = AutoConfig.from_pretrained(
            os.path.join(
                module_path,
                'examples/fastspeech2/conf/fastspeech2.song44k.v5.1.yaml'))
        self.fastspeech2 = TFAutoModel.from_pretrained(
            config=fastspeech2_config,
            pretrained_path=os.path.join(
                module_path,
                "examples/fastspeech2/exp/train.fastspeech2.song44k.v5.1/checkpoints/model-600000.h5"
            ),
            name="fastspeech2")

        # mel gan 설정, 학습된 모델 가져오기
        mb_melgan_config = AutoConfig.from_pretrained(
            os.path.join(
                module_path,
                'examples/multiband_melgan/conf/multiband_melgan.ko.song44k.v5.1.yaml'
            ))
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=os.path.join(
                module_path,
                "examples/multiband_melgan/exp/train.multiband_melgan.ko.song44k.v5.1/checkpoints/generator-1740000.h5"
            ),
            name="mb_melgan")

        #processor - 글자 별 상응하는 숫자의 mapper 설정 가져오기
        self.processor = AutoProcessor.from_pretrained(
            pretrained_path=os.path.join(module_path,
                                         "test/files/kss_mapper.json"))
示例#9
0
    def __init__(self):
        # gpu memory의 1/3 만을 할당하기로 제한
        gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8) 
        conf = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
        # 탄력적인 메모리 할당
        #conf.gpu_options.allow_growth = True
        session = tf.compat.v1.Session(config=conf)

        # tacotron 설정, 학습된 모델 가져오기
        module_path = os.path.dirname(os.path.abspath(__file__))        
        # pdb.set_trace()
        tacotron2_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/tacotron2/conf/tacotron2.song8k.v3.yaml'))
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=os.path.join(module_path,"./examples/tacotron2/exp/train.tacotron2.song8k.v3/checkpoints/model-68000.h5"),
            name="tacotron2"
        )

        # fast speech 설정, 학습된 모델 가져오기
        fastspeech2_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/fastspeech2/conf/fastspeech2.song8k.v3.yaml'))
        self.fastspeech2 = TFAutoModel.from_pretrained(
            config=fastspeech2_config,
            # pretrained_path=os.path.join(module_path,"./examples/fastspeech2/exp/train.fastspeech2.song8k.v1.1/checkpoints/model-200000.h5"),
            pretrained_path=os.path.join(module_path,"./examples/fastspeech2/exp/train.fastspeech2.song8k.v3/checkpoints/model-200000.h5"),
            name="fastspeech2"
        )

        # fastspeech1_config = AutoConfig.from_pretrained(os.path.join(module_path,'examples/fastspeech/conf/fastspeech.v3_song44k_v51.yaml'))
        # self.fastspeech1 = TFAutoModel.from_pretrained(
        #     config=fastspeech1_config,
        #     pretrained_path=os.path.join(module_path,"examples/fastspeech/exp/train.fastspeech.song.v41/checkpoints/model-200000.h5"),
        #     name="fastspeech1"
        # )          
        # resizing positional embedding
        # self.fastspeech1._build()
        # self.fastspeech1.save_weights("./resize.h5")
        # self.fastspeech1.resize_positional_embeddings(8000)
        # self.fastspeech1.load_weights("./resize.h5", by_name=True, skip_mismatch=True)   

        # mel gan 설정, 학습된 모델 가져오기
        mb_melgan_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/multiband_melgan/conf/multiband_melgan.ko.8k.v3.yaml'))
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=os.path.join(module_path,"./examples/multiband_melgan/exp/train.multiband_melgan.ko.song8k.v3/checkpoints/generator-1000000.h5"),
            name="mb_melgan"
        )

        #processor - 글자 별 상응하는 숫자의 mapper 설정 가져오기
        self.processor = AutoProcessor.from_pretrained(pretrained_path=os.path.join(module_path,"test/files/kss_mapper.json"))
示例#10
0
def infer(input_text):
  processor = AutoProcessor.from_pretrained(pretrained_path=config_lp.baker_mapper_pretrained_path)
  input_text = tts_pause.add_pause(input_text)
  # logging.info( "[TTSModel] [do_synthesis] input_text:{}".format( input_text ) )
  input_ids = processor.text_to_sequence(input_text, inference=True) 
        
  # input_ids = np.concatenate([input_ids, [len(symbols) - 1]], -1)  # eos.
  # 
  interpreter_tacotron.resize_tensor_input(input_details_tacotron[0]['index'],  [1, len(input_ids)])
  interpreter_tacotron.allocate_tensors()
  input_data = prepare_input(input_ids)
  for i, detail in enumerate(input_details_tacotron):
    print(detail)
    input_shape = detail['shape']
    interpreter_tacotron.set_tensor(detail['index'], input_data[i])

  interpreter_tacotron.invoke()

  # The function `get_tensor()` returns a copy of the tensor data.
  # Use `tensor()` in order to get a pointer to the tensor.
  return (interpreter_tacotron.get_tensor(output_details_tacotron[0]['index']), # decoder_output_tflite
          interpreter_tacotron.get_tensor(output_details_tacotron[1]['index'])) # mel_output_tflite
示例#11
0
# fastspeech2_config = AutoConfig.from_pretrained('examples/fastspeech2/conf/fastspeech2.baker.v2.yaml')
# fastspeech2 = TFAutoModel.from_pretrained(
#     config=fastspeech2_config,
#     pretrained_path="trained/fastspeech2-200k.h5",
#     name="fastspeech2"
# )

# MB-MelGAN
mb_melgan_config = AutoConfig.from_pretrained(
    'examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml')
mb_melgan = TFAutoModel.from_pretrained(
    config=mb_melgan_config,
    pretrained_path="trained/mb.melgan_word-480k.h5",
    name="mb_melgan")

processor = AutoProcessor.from_pretrained(
    pretrained_path="trained/baker_mapper_word.json")  # BakerProcessor


def do_synthesis(input_text, text2mel_model, vocoder_model, text2mel_name,
                 vocoder_name):
    input_ids = processor.text_to_sequence(input_text, inference=True)

    # text2mel part
    if text2mel_name == "TACOTRON":
        _, mel_outputs, stop_token_prediction, alignment_history = text2mel_model.inference(
            tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
            tf.convert_to_tensor([len(input_ids)], tf.int32),
            tf.convert_to_tensor([0], dtype=tf.int32))
    elif text2mel_name == "FASTSPEECH2":
        mel_before, mel_outputs, duration_outputs, _, _ = text2mel_model.inference(
            tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
示例#12
0
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

# The Original Code is Copyright (C) 2020 Voxell Technologies.
# All rights reserved.

import soundfile as sf
import tensorflow as tf
from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

processor = AutoProcessor.from_pretrained("../ljspeech_mapper.json")

# initialize tacotron2 model
config = AutoConfig.from_pretrained("../tacotron2/conf/tacotron2.v1.yaml")
tacotron2 = TFAutoModel.from_pretrained(
  config=config, 
  pretrained_path=None,
  is_build=True,
  name="tacotron2"
)

tacotron2.setup_window(win_front=6, win_back=6)
tacotron2.setup_maximum_iterations(3000)

tacotron2.load_weights("../tacotron2/checkpoints/model-120000.h5")
# tf.saved_model.save(tacotron2, "../tacotron2/inference", signatures=tacotron2.inference)
        '--path_fs',
        default=
        "examples/fastspeech2_libritts/outdir_libri/checkpoints/model-855000.h5"
    )
    parser.add_argument('--path_mb',
                        default="checks/mb_melgan_or/mb.melgan-940k.h5")

    args = parser.parse_args()

    fastspeech2_config = AutoConfig.from_pretrained(
        'examples/fastspeech2/conf/fastspeech2.v1.yaml')
    fastspeech2 = TFAutoModel.from_pretrained(
        config=fastspeech2_config,
        pretrained_path=args.
        path_fs,  #"examples/fastspeech2_libritts/outdir_libri/checkpoints/model-855000.h5",
        #training=False,
        name="fastspeech2")

    mb_melgan_config = AutoConfig.from_pretrained(
        'examples/multiband_melgan/conf/multiband_melgan.v1.yaml')
    mb_melgan = TFAutoModel.from_pretrained(
        config=mb_melgan_config,
        pretrained_path=args.
        path_mb,  #"checks/mb_melgan_or/mb.melgan-940k.h5",
        name="mb_melgan")

    processor = AutoProcessor.from_pretrained(
        pretrained_path="dump_ljspeech/ljspeech_mapper.json")

    app.run(host='0.0.0.0', port=5454)
示例#14
0
 def __init_model(self):
     self.processor = AutoProcessor.from_pretrained(
         pretrained_path=config.baker_mapper_pretrained_path)
示例#15
0
import sys

if __name__ == "__main__":
    argvs = sys.argv

    if (len(argvs) != 3):
        print("usage: python3 {} mapper.json text(hanzi)".format(argvs[0]))
    else:
        from tensorflow_tts.inference import AutoProcessor
        mapper_json = argvs[1]
        processor = AutoProcessor.from_pretrained(pretrained_path=mapper_json)

        input_text = argvs[2]
        input_ids = processor.text_to_sequence(input_text, inference=True)
        print(" ".join(str(i) for i in input_ids))
示例#16
0
"""

tacotron2_config = AutoConfig.from_pretrained(
    'TensorFlowTTS/examples/tacotron2/conf/tacotron2.baker.v1.yaml')
tacotron2 = TFAutoModel.from_pretrained(config=tacotron2_config,
                                        pretrained_path="tacotron2-100k.h5",
                                        training=False,
                                        name="tacotron2")
mb_melgan_config = AutoConfig.from_pretrained(
    'TensorFlowTTS/examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml'
)
mb_melgan = TFAutoModel.from_pretrained(config=mb_melgan_config,
                                        pretrained_path="mb.melgan-920k.h5",
                                        name="mb_melgan")

processor = AutoProcessor.from_pretrained(
    pretrained_path="./baker_mapper.json")


def do_synthesis(input_text, text2mel_model, vocoder_model, text2mel_name,
                 vocoder_name):
    input_ids = processor.text_to_sequence(input_text, inference=True)

    # text2mel part
    if text2mel_name == "TACOTRON":
        _, mel_outputs, stop_token_prediction, alignment_history = text2mel_model.inference(
            tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
            tf.convert_to_tensor([len(input_ids)], tf.int32),
            tf.convert_to_tensor([0], dtype=tf.int32))
    elif text2mel_name == "FASTSPEECH2":
        mel_before, mel_outputs, duration_outputs, _, _ = text2mel_model.inference(
            tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
示例#17
0
def test_auto_processor(mapper_path):
    processor = AutoProcessor.from_pretrained(pretrained_path=mapper_path)
示例#18
0
    name="fastspeech2"
)

# MB-MelGAN
mb_melgan_config = AutoConfig.from_pretrained('examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml')
mb_melgan = TFAutoModel.from_pretrained(
    config=mb_melgan_config,
    # pretrained_path="trained/mb.melgan.char-800k.h5",  # "trained/mb.melgan-1M.h5"
    pretrained_path="trained/mb.melgan_word_428k.h5",  # "trained/mb.melgan-1M.h5"
    # is_build=False,  # don't build model if you want to save it to pb. (TF related bug)
    name="mb_melgan"
)

# LJSpeechProcessor
# processor = AutoProcessor.from_pretrained("trained/baker_mapper_char.json")
processor = AutoProcessor.from_pretrained("trained/baker_mapper_word.json")


# save tacotron2 to pb
def save_tacotron2_pb():
    input_text = "i love you so much."
    input_ids = processor.text_to_sequence(input_text)

    tacotron2.setup_window(win_front=3, win_back=3)
    tacotron2.setup_maximum_iterations(3000)
    decoder_output, mel_outputs, stop_token_prediction, alignment_history = tacotron2.inference(
        input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
        input_lengths=tf.convert_to_tensor([len(input_ids)], tf.int32),
        speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32),
    )
    tacotron2.load_weights("examples/tacotron2/exp/baker.mix.ali/checkpoints/model-22000.h5")
示例#19
0
def test_auto_processor(mapper_path):
    processor = AutoProcessor.from_pretrained(pretrained_path=mapper_path)
    processor.save_pretrained("./test_saved")
    processor = AutoProcessor.from_pretrained("./test_saved/processor.json")