Exemplo n.º 1
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
	log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
	log('Synthesizing mel-spectrograms from text..')
	wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
	wavenet_synthesize(args, hparams, wave_checkpoint)
	log('Tacotron-2 TTS synthesis complete!')
Exemplo n.º 2
0
def synthesize(args, hparams, gst_checkpoint, wave_checkpoint, sentences, reference_mel):
	log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name))
	log('Synthesizing mel-spectrograms from text..')
	wavenet_in_dir = gst_synthesize(args, gst_checkpoint, sentences, reference_mel)
	log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
	wavenet_synthesize(args, hparams, wave_checkpoint)
	log('Tacotron-2 TTS synthesis complete!')
Exemplo n.º 3
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
	log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
	log('Synthesizing mel-spectrograms from text..')
	wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	#Delete Tacotron model from graph
	tf.reset_default_graph()
	log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
	wavenet_synthesize(args, hparams, wave_checkpoint)
	log('Tacotron-2 TTS synthesis complete!')
Exemplo n.º 4
0
def main():
    accepted_modes = ['eval', 'synthesis', 'live']
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint')
    parser.add_argument('--hparams', default='',
                        help='Hyperparameter overrides as a comma-separated list of name=value pairs')
    parser.add_argument('--name', default='tacotron2_female_golden_v2_female_v2_first_run_all_checkpoint',help='Name of logging directory if the two models were trained together.')
    parser.add_argument('--tacotron_name', help='Name of logging directory of Tacotron. If trained separately')
    parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately')
    parser.add_argument('--model', default='Tacotron')
    parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets')
    parser.add_argument('--mels_dir', default='tacotron_output/eval/',
                        help='folder to contain mels to synthesize audio from using the Wavenet')
    parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms')
    parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
    parser.add_argument('--GTA', default='True',
                        help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
    parser.add_argument('--text_list', default='pinyin.corpus',
                        help='Text file contains list of texts to be synthesized. Valid if mode=eval')
    parser.add_argument('--speaker_id', default=None,
                        help='Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids')
    args = parser.parse_args()

    accepted_models = ['Tacotron', 'WaveNet', 'Tacotron-2']

    if args.model not in accepted_models:
        raise ValueError('please enter a valid model to synthesize with: {}'.format(accepted_models))

    if args.mode not in accepted_modes:
        raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))

    if args.mode == 'live' and args.model == 'Wavenet':
        raise RuntimeError(
            'Wavenet vocoder cannot be tested live due to its slow generation. Live only works with Tacotron!')

    if args.GTA not in ('True', 'False'):
        raise ValueError('GTA option must be either True or False')

    if args.model == 'Tacotron-2':
        if args.mode == 'live':
            warn('Requested a live evaluation with Tacotron-2, Wavenet will not be used!')
        if args.mode == 'synthesis':
            raise ValueError(
                'I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)')

    taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
    sentences = get_sentences(args)

    if args.model == 'Tacotron':
        _ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
    elif args.model == 'WaveNet':
        wavenet_synthesize(args, hparams, wave_checkpoint)
    elif args.model == 'Tacotron-2':
        synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
    else:
        raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))
Exemplo n.º 5
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
	log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
	log('Synthesizing mel-spectrograms from text..')
	wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	#Delete Tacotron model from graph
	tf.reset_default_graph()
	#Sleep 1/2 second to let previous graph close and avoid error messages while Wavenet is synthesizing
	sleep(0.5)
	log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
	wavenet_synthesize(args, hparams, wave_checkpoint)
	log('Tacotron-2 TTS synthesis complete!')
Exemplo n.º 6
0
def main():
	accepted_modes = ['eval', 'synthesis', 'live']
	parser = argparse.ArgumentParser()
	parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint')
	parser.add_argument('--hparams', default='',
		help='Hyperparameter overrides as a comma-separated list of name=value pairs')
	parser.add_argument('--name', help='Name of logging directory if the two models were trained together.')
	parser.add_argument('--tacotron_name', help='Name of logging directory of Tacotron. If trained separately')
	parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately')
	parser.add_argument('--model', default='Tacotron-2')
	parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets')
	parser.add_argument('--mels_dir', default='tacotron_output/eval/', help='folder to contain mels to synthesize audio from using the Wavenet')
	parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms')
	parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
	parser.add_argument('--GTA', default='True', help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
	parser.add_argument('--text_list', default='', help='Text file contains list of texts to be synthesized. Valid if mode=eval')
	args = parser.parse_args()
	
	accepted_models = ['Tacotron', 'WaveNet', 'Both', 'Tacotron-2']

	if args.model not in accepted_models:
		raise ValueError('please enter a valid model to synthesize with: {}'.format(accepted_models))

	if args.mode not in accepted_modes:
		raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))

	if args.mode=='live' and args.model=='Wavenet':
		raise RuntimeError('Wavenet vocoder cannot be tested live due to its slow generation. Live only works with Tacotron!')

	if args.GTA not in ('True', 'False'):
		raise ValueError('GTA option must be either True or False')

	if args.model in ('Both', 'Tacotron-2'):
		if args.mode == 'live':
			warn('Requested a live evaluation with Tacotron-2, Wavenet will not be used!')
		if args.mode == 'synthesis':
			raise ValueError('I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)')

	taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
	sentences = get_sentences(args)

	if args.model == 'Tacotron':
		_ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	elif args.model == 'WaveNet':
		wavenet_synthesize(args, hparams, wave_checkpoint)
	elif args.model in ('Both', 'Tacotron-2'):
		synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
	else:
		raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))
Exemplo n.º 7
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
	log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
	log('Synthesizing mel-spectrograms from text..')
	
	start = time()
	wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	M_sec = time() - start
	
	#Delete Tacotron model from graph
	tf.reset_default_graph()
	#Sleep 1/2 second to let previous graph close and avoid error messages while Wavenet is synthesizing
	sleep(0.5)
	log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
	
	start = time()
	wavenet_synthesize(args, hparams, wave_checkpoint)
	N_sec = time() - start
	
	texts_len = sum([len(sent) for sent in sentences])
	
	log('Tacotron-2 TTS synthesis complete!')
	
	log ("{} text_to_mel seconds".format(M_sec), "{} mel_to_wav seconds".format(N_sec), "{} characters".format(texts_len))
Exemplo n.º 8
0
	if args.model == 'Tacotron-2':
		if args.mode == 'live':
			warn('Requested a live evaluation with Tacotron-2, Wavenet will not be used!')
		if args.mode == 'synthesis':
			raise ValueError('I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)')

	taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
	sentences = get_sentences(args, hparams.chinese_dict)

	#preprocess args.speaker_id
	if args.speaker_id is not None:
		args.speaker_id = args.speaker_id.split(',')
		if not len(args.speaker_id) == len(sentences):
			args.speaker_id = [args.speaker_id[0]] * len(sentences)
		args.speaker_id = ','.join(args.speaker_id)



	if args.model == 'Tacotron':
		_ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	elif args.model == 'WaveNet':
		wavenet_synthesize(args, hparams, wave_checkpoint)
	elif args.model == 'Tacotron-2':
		synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
	else:
		raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))


if __name__ == '__main__':
	main()
Exemplo n.º 9
0
def main():
    accepted_modes = ['eval', 'synthesis', 'live']
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--taco_checkpoint',
        default=
        '/groups/ming/tacotron2/Blizzard-2012/logs-confidence=30/taco_pretrained/tacotron_model.ckpt-40000',
        help='Path to model checkpoint')
    parser.add_argument(
        '--wave_checkpoint',
        default=
        '/groups/ming/tacotron2/Blizzard-2012/logs-Wavenet/taco_pretrained/',
        help='Path to model checkpoint')
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--model', default='Tacotron-2')
    parser.add_argument('--input_dir',
                        default='/groups/ming/tacotron2/Blizzard-2012/data/',
                        help='folder to contain inputs sentences/targets')
    parser.add_argument(
        '--mels_dir',
        default='/groups/ming/tacotron2/Blizzard-2012/tacotron_output/eval/',
        help='folder to contain mels to synthesize audio from using the Wavenet'
    )
    parser.add_argument(
        '--output_dir',
        default='/groups/ming/tacotron2/Blizzard-2012/tacotron_output/',
        help='folder to contain synthesized mel spectrograms')
    parser.add_argument(
        '--mode',
        default='eval',
        help='mode of run: can be one of {}'.format(accepted_modes))
    parser.add_argument(
        '--GTA',
        default='True',
        help=
        'Ground truth aligned synthesis, defaults to True, only considered in synthesis mode'
    )
    parser.add_argument(
        '--modify_vae_dim',
        default=None,
        help=
        'The model will synthesize spectrogram with the specified dimensions of the VAE code modified. This variable must be a comma-separated list of dimensions. If None, synthesis will be based on the code generated by the VAE encoder without modification. The modification will be based on the mean and variance generated by the VAE encoder, while if in eval mode and reference_mel is not specified, the mean and variance of a unit Gaussian distribution will be considered for the modification. Considered only when hparams.use_vae=True and GTA=False.'
    )
    parser.add_argument(
        '--reference_mel',
        default=None,
        help=
        'The mel spectrogram file to be referenced. Valid if hparams.use_vae=True and GTA=False'
    )
    parser.add_argument(
        '--text_list',
        default='',
        help=
        'Text file contains list of texts to be synthesized. Valid if mode=eval'
    )
    parser.add_argument(
        '--speaker_id',
        default=None,
        help=
        'Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids'
    )
    args = parser.parse_args()

    accepted_models = ['Tacotron', 'WaveNet', 'Tacotron-2', 'Inference']

    if args.model not in accepted_models:
        raise ValueError(
            'please enter a valid model to synthesize with: {}'.format(
                accepted_models))

    if args.mode not in accepted_modes:
        raise ValueError('accepted modes are: {}, found {}'.format(
            accepted_modes, args.mode))

    if args.mode == 'live' and args.model == 'Wavenet':
        raise RuntimeError(
            'Wavenet vocoder cannot be tested live due to its slow generation. Live only works with Tacotron!'
        )

    if args.GTA not in ('True', 'False'):
        raise ValueError('GTA option must be either True or False')

    if args.model == 'Tacotron-2':
        if args.mode == 'live':
            warn(
                'Requested a live evaluation with Tacotron-2, Wavenet will not be used!'
            )
        if args.mode == 'synthesis':
            raise ValueError(
                'I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)'
            )

    if args.reference_mel is not None and not os.path.isfile(
            args.reference_mel):
        raise RuntimeError(
            'The reference mel-spectrogram file doesn\'t exist.')

    taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
    sentences = get_sentences(args)

    if args.model == 'Tacotron':
        _ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
    elif args.model == 'WaveNet':
        wavenet_synthesize(args, hparams, wave_checkpoint)
    elif args.model == 'Tacotron-2':
        synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
    elif args.model == 'Inference':
        inference(args, hparams, taco_checkpoint)
    else:
        raise ValueError('Model provided {} unknown! {}'.format(
            args.model, accepted_models))