def main():
	accepted_modes = ['eval', 'synthesis', 'live']
	parser = argparse.ArgumentParser()
	parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint')
	parser.add_argument('--hparams', default='',
		help='Hyperparameter overrides as a comma-separated list of name=value pairs')
	parser.add_argument('--name', help='Name of logging directory if the two models were trained together.')
	parser.add_argument('--tacotron_name', help='Name of logging directory of Tacotron. If trained separately')
	parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately')
	parser.add_argument('--model', default='Tacotron')
	parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets')
	parser.add_argument('--mels_dir', default='tacotron_output/eval/', help='folder to contain mels to synthesize audio from using the Wavenet')
	parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms')
	parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
	parser.add_argument('--GTA', default='True', help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
	parser.add_argument('--text_list', default='synthesis_text_BZNSYP.txt', help='Text file contains list of texts to be synthesized. Valid if mode=eval')
	# parser.add_argument('--text_list', default='text_id_list_for_synthesis.txt', help='Text file contains list of texts to be synthesized. Valid if mode=eval')
	# parser.add_argument('--text_list', default='hello_maybe_1.txt', help='Text file contains list of texts to be synthesized. Valid if mode=eval')
	#parser.add_argument('--speaker_id_list', default='speaker_list_for_synthesis.txt', help='Text file contains list of speaker_id to be synthesized. Valid if mode=eval')
	parser.add_argument('--speaker_id', default=None, help='Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids')
	args = parser.parse_args()

	if args.mode not in accepted_modes:
		raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))

	if args.GTA not in ('True', 'False'):
		raise ValueError('GTA option must be either True or False')

	taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
	sentences, speaker_id_lists = get_sentences_and_speakerIdLists(args)

	tacotron_synthesize(args, hparams, taco_checkpoint, sentences, speaker_id_lists)
Exemplo n.º 2
0
def train(args, log_dir, hparams):
    state_file = os.path.join(log_dir, 'state_log')
    (taco_state, GTA_state, wave_state) = read_seq(state_file)

    if not taco_state:
        log('\n#############################################################\n'
            )
        log('Tacotron Train\n')
        log('###########################################################\n')
        checkpoint = tacotron_train(args, log_dir, hparams)

        tf.reset_default_graph()

        # Sleep 1/2 second to let previous graph close and avoid error messages while synthesis
        sleep(0.5)

        if checkpoint is None:
            raise ('Error occured while training Tacotron, Exiting!')

        taco_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state])
    else:
        checkpoint = os.path.join(log_dir, 'taco_pretrained')

    if not GTA_state:
        log('\n#############################################################\n'
            )
        log('Tacotron GTA Synthesis\n')
        log('###########################################################\n')
        args.mode = 'synthesis'
        tacotron_synthesize(args, hparams, checkpoint)

        args.mode = 'eval'
        tacotron_synthesize(args, hparams, checkpoint, get_sentences(args))

        tf.reset_default_graph()

        # Sleep 1/2 second to let previous graph close and avoid error messages while Wavenet is training
        sleep(0.5)

        GTA_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state])

    if not wave_state:
        log('\n#############################################################\n'
            )
        log('WaveRNN Train\n')
        log('###########################################################\n')
        wavernn_preprocess(args, hparams)

        wavernn_train(args, log_dir, hparams)

        wave_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state])

    if wave_state and GTA_state and taco_state:
        log('TRAINING IS ALREADY COMPLETE!!')
Exemplo n.º 3
0
def main():
    accepted_modes = ['eval', 'synthesis']
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint',
                        default='logs-Tacotron/pretrained/',
                        help='Path to model checkpoint')
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--reference_audio', required=True)
    parser.add_argument('--model', default='Tacotron')
    parser.add_argument('--input_dir',
                        default='training_data/',
                        help='folder to contain inputs sentences/targets')
    parser.add_argument('--output_dir',
                        default='output/',
                        help='folder to contain synthesized mel spectrograms')
    parser.add_argument(
        '--mode',
        default='synthesis',
        help='mode of run: can be one of {}'.format(accepted_modes))
    parser.add_argument(
        '--GTA',
        default=False,
        help=
        'Ground truth aligned synthesis, defaults to True, only considered in synthesis mode'
    )
    parser.add_argument(
        '--text_list',
        default='',
        help=
        'Text file contains list of texts to be synthesized. Valid if mode=eval'
    )
    args = parser.parse_args()

    accepted_models = ['Tacotron', 'Wavenet']

    if args.model not in accepted_models:
        raise ValueError(
            'please enter a valid model to train: {}'.format(accepted_models))

    if args.mode not in accepted_modes:
        raise ValueError('accepted modes are: {}, found {}'.format(
            accepted_modes, args.mode))

    if args.model == 'Tacotron':
        tacotron_synthesize(args)
    elif args.model == 'Wavenet':
        raise NotImplementedError(
            'Wavenet is still a work in progress, thank you for your patience!'
        )
Exemplo n.º 4
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
    log('Running End-to-End TTS Evaluation. Model: {}'.format(args.model))
    log('Synthesizing mel-spectrograms from text..')
    tacotron_synthesize(args, hparams, taco_checkpoint, sentences)

    # Delete Tacotron model from graph
    tf.reset_default_graph()

    # Sleep 1/2 second to let previous graph close and avoid error messages while Wavenet is synthesizing
    sleep(0.5)

    log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
    wavernn_synthesize(args, hparams, wave_checkpoint)

    log('Tacotron-2 TTS synthesis complete!')
Exemplo n.º 5
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
    log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
    log('Synthesizing mel-spectrograms from text..')
    wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
    log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
    wavenet_synthesize(args, hparams, wave_checkpoint)
    log('Tacotron-2 TTS synthesis complete!')
Exemplo n.º 6
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
	log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
	log('Synthesizing mel-spectrograms from text..')
	wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
	wavenet_synthesize(args, hparams, wave_checkpoint)
	log('Tacotron-2 TTS synthesis complete!')
Exemplo n.º 7
0
def train(args, log_dir, hparams):
    state_file = os.path.join(log_dir, 'state_log')
    # Get training states
    (taco_state, GTA_state, wave_state), input_path = read_seq(state_file)

    if not taco_state:
        logging.debug(
            '\n#############################################################\n'
        )
        logging.debug('Tacotron Train\n')
        logging.debug(
            '###########################################################\n')
        checkpoint = tacotron_train(args, log_dir, hparams)
        tf.reset_default_graph()
        # Sleep 1/2 second to let previous graph close and avoid error messages while synthesis
        sleep(0.5)
        if checkpoint is None:
            raise ('Error occured while training Tacotron, Exiting!')
        taco_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state], input_path)
    else:
        checkpoint = os.path.join(log_dir, 'taco_pretrained/')

    if not GTA_state:
        logging.debug(
            '\n#############################################################\n'
        )
        logging.debug('Tacotron GTA Synthesis\n')
        logging.debug(
            '###########################################################\n')
        input_path = tacotron_synthesize(args, hparams, checkpoint)
        tf.reset_default_graph()
        # Sleep 1/2 second to let previous graph close and avoid error messages while Wavenet is training
        sleep(0.5)
        GTA_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state], input_path)
    else:
        input_path = os.path.join('tacotron_' + args.output_dir, 'gta',
                                  'map.txt')

    if input_path == '' or input_path is None:
        raise RuntimeError(
            'input_path has an unpleasant value -> {}'.format(input_path))

    if not wave_state:
        logging.debug(
            '\n#############################################################\n'
        )
        logging.debug('Wavenet Train\n')
        logging.debug(
            '###########################################################\n')
        checkpoint = wavenet_train(args, log_dir, hparams, input_path)
        if checkpoint is None:
            raise ('Error occured while training Wavenet, Exiting!')
        wave_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state], input_path)

    if wave_state and GTA_state and taco_state:
        logging.debug('TRAINING IS ALREADY COMPLETE!!')
Exemplo n.º 8
0
def synthesize(sentences, output_dir):
    # Tacotron first
    args = namedtuple(
        "tacoargs", "mode model checkpoint output_dir mels_dir hparams name".split())
    args.mode = "eval"
    args.model = "Tacotron-2"
    args.checkpoint = "pretrained/"
    args.output_dir = "output"
    args.mels_dir = "tacotron_output/eval"
    args.base_dir = ''
    args.input_dir = 'training_data/'
    args.hparams = ''
    args.name = "Tacotron-2"
    args.log_dir = None
    taco_checkpoint, _, hparams = prepare_run(args)
    taco_checkpoint = os.path.join("tacotron2", taco_checkpoint)
    tacotron_synthesize(args, hparams, taco_checkpoint, sentences)

    # now WaveRNN
    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(MODEL_PATH)

    model = Model(rnn_dims=512, fc_dims=512, bits=bits, pad=2,
                  upsample_factors=(5, 5, 11), feat_dims=80,
                  compute_dims=128, res_out_dims=128, res_blocks=10).to(device)

    print("Loading WaveRNN model from " + MODEL_PATH)
    model.load_state_dict(torch.load(MODEL_PATH))


    # mels_paths = [f for f in sorted(
    #     os.listdir(args.mels_dir)) if f.endswith(".npy")]
    map_path = os.path.join(args.mels_dir, 'map.txt')
    f = open(map_path)
    maps = f.readlines()
    f.close()

    mels_paths = [x.split('|')[1] for x in maps]
    test_mels = [np.load(m).T for m in mels_paths]


    fu.ensure_dir(output_dir)

    for i, mel in enumerate(test_mels):
        print('\nGenerating: %i/%i' % (i+1, len(test_mels)))
        model.generate(mel, output_dir + f'/{i}_generated.wav')
Exemplo n.º 9
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
    log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name
                                                              or args.model))
    log('Synthesizing mel-spectrograms from text..')
    wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint,
                                         sentences)
    # Delete Tacotron model from graph
    tf.reset_default_graph()
    log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
Exemplo n.º 10
0
def main():
    accepted_modes = ['eval', 'synthesis', 'live']
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint')
    parser.add_argument('--hparams', default='',
                        help='Hyperparameter overrides as a comma-separated list of name=value pairs')
    parser.add_argument('--name', default='tacotron2_female_golden_v2_female_v2_first_run_all_checkpoint',help='Name of logging directory if the two models were trained together.')
    parser.add_argument('--tacotron_name', help='Name of logging directory of Tacotron. If trained separately')
    parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately')
    parser.add_argument('--model', default='Tacotron')
    parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets')
    parser.add_argument('--mels_dir', default='tacotron_output/eval/',
                        help='folder to contain mels to synthesize audio from using the Wavenet')
    parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms')
    parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
    parser.add_argument('--GTA', default='True',
                        help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
    parser.add_argument('--text_list', default='pinyin.corpus',
                        help='Text file contains list of texts to be synthesized. Valid if mode=eval')
    parser.add_argument('--speaker_id', default=None,
                        help='Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids')
    args = parser.parse_args()

    accepted_models = ['Tacotron', 'WaveNet', 'Tacotron-2']

    if args.model not in accepted_models:
        raise ValueError('please enter a valid model to synthesize with: {}'.format(accepted_models))

    if args.mode not in accepted_modes:
        raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))

    if args.mode == 'live' and args.model == 'Wavenet':
        raise RuntimeError(
            'Wavenet vocoder cannot be tested live due to its slow generation. Live only works with Tacotron!')

    if args.GTA not in ('True', 'False'):
        raise ValueError('GTA option must be either True or False')

    if args.model == 'Tacotron-2':
        if args.mode == 'live':
            warn('Requested a live evaluation with Tacotron-2, Wavenet will not be used!')
        if args.mode == 'synthesis':
            raise ValueError(
                'I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)')

    taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
    sentences = get_sentences(args)

    if args.model == 'Tacotron':
        _ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
    elif args.model == 'WaveNet':
        wavenet_synthesize(args, hparams, wave_checkpoint)
    elif args.model == 'Tacotron-2':
        synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
    else:
        raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))
Exemplo n.º 11
0
def main():
    accepted_modes = ['eval', 'synthesis', 'live']
    parser = argparse.ArgumentParser()
    parser.add_argument('--base_dir', default='')
    parser.add_argument('--hparams', default='', help='Hyperparameter overrides as a comma-separated list of name=value pairs')
    parser.add_argument('--model', default='Tacotron-2')
    parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
    parser.add_argument('--GTA', action='store_true', help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
    parser.add_argument('--text_list', default='', help='Text file contains list of texts to be synthesized. Valid if mode=eval')
    parser.add_argument('--speaker_id', default=None, help='Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids')
    parser.add_argument('--use_cuda', action='store_true')
    args = parser.parse_args()

    accepted_models = ['Tacotron', 'WaveRNN', 'Tacotron-2']

    if args.model not in accepted_models:
        raise ValueError('please enter a valid model to synthesize with: {}'.format(accepted_models))

    if args.mode not in accepted_modes:
        raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))

    if args.mode == 'live' and args.model == 'WaveRNN':
        raise RuntimeError('WaveRNN vocoder cannot be tested live due to its slow generation. Live only works with Tacotron!')

    if args.model == 'Tacotron-2':
        if args.mode == 'live':
            warn('Requested a live evaluation with Tacotron-2, WaveRNN will not be used!')
        if args.mode == 'synthesis':
            raise ValueError('I don\'t recommend running WaveRNN on entire dataset.. The world might end before the synthesis :) (only eval allowed)')

    taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
    sentences = get_sentences(args)

    if args.model == 'Tacotron':
        tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
    elif args.model == 'WaveRNN':
        wavernn_synthesize(args, hparams, wave_checkpoint)
    elif args.model == 'Tacotron-2':
        synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
    else:
        raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))
Exemplo n.º 12
0
def main():
	accepted_modes = ['eval', 'synthesis', 'live']
	parser = argparse.ArgumentParser()
	parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint')
	parser.add_argument('--hparams', default='',
		help='Hyperparameter overrides as a comma-separated list of name=value pairs')
	parser.add_argument('--name', help='Name of logging directory if the two models were trained together.')
	parser.add_argument('--tacotron_name', help='Name of logging directory of Tacotron. If trained separately')
	parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately')
	parser.add_argument('--model', default='Tacotron-2')
	parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets')
	parser.add_argument('--mels_dir', default='tacotron_output/eval/', help='folder to contain mels to synthesize audio from using the Wavenet')
	parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms')
	parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
	parser.add_argument('--GTA', default='True', help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
	parser.add_argument('--text_list', default='', help='Text file contains list of texts to be synthesized. Valid if mode=eval')
	args = parser.parse_args()
	
	accepted_models = ['Tacotron', 'WaveNet', 'Both', 'Tacotron-2']

	if args.model not in accepted_models:
		raise ValueError('please enter a valid model to synthesize with: {}'.format(accepted_models))

	if args.mode not in accepted_modes:
		raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))

	if args.mode=='live' and args.model=='Wavenet':
		raise RuntimeError('Wavenet vocoder cannot be tested live due to its slow generation. Live only works with Tacotron!')

	if args.GTA not in ('True', 'False'):
		raise ValueError('GTA option must be either True or False')

	if args.model in ('Both', 'Tacotron-2'):
		if args.mode == 'live':
			warn('Requested a live evaluation with Tacotron-2, Wavenet will not be used!')
		if args.mode == 'synthesis':
			raise ValueError('I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)')

	taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
	sentences = get_sentences(args)

	if args.model == 'Tacotron':
		_ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	elif args.model == 'WaveNet':
		wavenet_synthesize(args, hparams, wave_checkpoint)
	elif args.model in ('Both', 'Tacotron-2'):
		synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
	else:
		raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))
Exemplo n.º 13
0
def train(args, log_dir, hparams):
	log('\n#############################################################\n')
	log('Tacotron Train\n')
	log('###########################################################\n')
	checkpoint = tacotron_train(args, log_dir, hparams)
	tf.reset_default_graph()
	if checkpoint is None:
		raise('Error occured while training Tacotron, Exiting!')
	log('\n#############################################################\n')
	log('Tacotron GTA Synthesis\n')
	log('###########################################################\n')
	input_path = tacotron_synthesize(args, hparams, checkpoint)
	log('\n#############################################################\n')
	log('Wavenet Train\n')
	log('###########################################################\n')
	wavenet_train(args, log_dir, hparams, input_path)
Exemplo n.º 14
0
def train(args, log_dir, hparams):
	log('\n#############################################################\n')
	log('Tacotron Train\n')
	log('###########################################################\n')
	checkpoint = tacotron_train(args, log_dir, hparams)
	tf.reset_default_graph()
	if checkpoint is None:
		raise('Error occured while training Tacotron, Exiting!')
	log('\n#############################################################\n')
	log('Tacotron GTA Synthesis\n')
	log('###########################################################\n')
	input_path = tacotron_synthesize(args, hparams, checkpoint)
	log('\n#############################################################\n')
	log('Wavenet Train\n')
	log('###########################################################\n')
	wavenet_train(args, log_dir, hparams, input_path)
Exemplo n.º 15
0
def train(args, log_dir, hparams):
    state_file = os.path.join(log_dir, 'state_log')
    #Get training states
    (taco_state, GTA_state, wave_state), input_path = read_seq(state_file)
    print('taco_state, GTA_state, wave_state:', taco_state, GTA_state,
          wave_state)
    if not taco_state:
        log('\n#############################################################\n'
            )
        log('Tacotron Train\n')
        log('###########################################################\n')
        checkpoint = tacotron_train(args, log_dir, hparams)
        tf.reset_default_graph()
        #Sleep 1/2 second to let previous graph close and avoid error messages while synthesis
        sleep(0.5)
        if checkpoint is None:
            raise ('Error occured while training Tacotron, Exiting!')
        taco_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state], input_path)
    else:
        checkpoint = os.path.join(log_dir, 'taco_pretrained/')
    # print('checkpoint:',checkpoint)
    log('tacotron_train done!!')

    if not GTA_state:
        log('\n#############################################################\n'
            )
        log('Tacotron GTA Synthesis\n')
        log('###########################################################\n')
        input_path = tacotron_synthesize(args, hparams, checkpoint)
        tf.reset_default_graph()
        #Sleep 1/2 second to let previous graph close and avoid error messages while Wavenet is training
        sleep(0.5)
        GTA_state = 1
        save_seq(state_file, [taco_state, GTA_state, wave_state], input_path)
    else:
        input_path = os.path.join(log_dir, 'tacotron_' + args.output_dir,
                                  'gta', 'map.txt')
        # input_path = './tacotron_output/gta/map.txt'
    log('Tacotron GTA Synthesis done')
Exemplo n.º 16
0
def main(websen=None, weight=''):

	accepted_modes = ['eval', 'synthesis', 'live']
	parser = argparse.ArgumentParser()
	parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint')
	parser.add_argument('--hparams', default='',
		help='Hyperparameter overrides as a comma-separated list of name=value pairs')
	parser.add_argument('--name', help='Name of logging directory if the two models were trained together.')
	parser.add_argument('--tacotron_name', help='Name of logging directory of Tacotron. If trained separately')
	parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately')
	parser.add_argument('--model', default='Tacotron')
	parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets')
	parser.add_argument('--mels_dir', default='tacotron_output/eval/', help='folder to contain mels to synthesize audio from using the Wavenet')
	parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms')
	parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
	parser.add_argument('--GTA', default='True', help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
	parser.add_argument('--text_list', default='web', help='Text file contains list of texts to be synthesized. Valid if mode=eval')
	parser.add_argument('--speaker_id', default=None, help='Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids')
	args = parser.parse_args(args=[])

	accepted_models = ['Tacotron']

	if args.model not in accepted_models:
		raise ValueError('please enter a valid model to synthesize with: {}'.format(accepted_models))

	if args.mode not in accepted_modes:
		raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))

	if args.GTA not in ('True', 'False'):
		raise ValueError('GTA option must be either True or False')

	taco_checkpoint, hparams = prepare_run(args, weight)
	sentences, speaker_labels, language_labels = get_sentences(args, websen)
	print(sentences)
	if args.model == 'Tacotron':
		_ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences, speaker_labels, language_labels)
	else:
		raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))
Exemplo n.º 17
0
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
	log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
	log('Synthesizing mel-spectrograms from text..')
	
	start = time()
	wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	M_sec = time() - start
	
	#Delete Tacotron model from graph
	tf.reset_default_graph()
	#Sleep 1/2 second to let previous graph close and avoid error messages while Wavenet is synthesizing
	sleep(0.5)
	log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
	
	start = time()
	wavenet_synthesize(args, hparams, wave_checkpoint)
	N_sec = time() - start
	
	texts_len = sum([len(sent) for sent in sentences])
	
	log('Tacotron-2 TTS synthesis complete!')
	
	log ("{} text_to_mel seconds".format(M_sec), "{} mel_to_wav seconds".format(N_sec), "{} characters".format(texts_len))
Exemplo n.º 18
0
def main(text):
    #无语
    temp_list = pinyin(text, style=Style.TONE3)
    sentence = []
    for temp in temp_list:
        sentence.append(temp[0])
    sentences = [' ' + ' '.join(sentence)]
    print(sentences)
    taco_checkpoint_path = os.path.join('taco_model',
                                        'tacotron_model.ckpt-100000')
    wave_checkpoint_path = os.path.join('wave_model', 'checkpoint_latest.pth')
    taco_output_dir = 'taco_output'
    wave_output_dir = 'wave_output'

    _ = tacotron_synthesize(hparams, taco_checkpoint_path,
                            os.path.join(taco_output_dir, 'org'), sentences)

    for i in range(len(sentences)):
        in_dir = os.path.join(taco_output_dir, 'org')
        out_dir = os.path.join(taco_output_dir, 'norm')
        scaler_path = os.path.join('wavenet_emmm', 'meanvar.joblib')
        scaler = joblib.load(scaler_path)
        inverse = None
        num_workers = None
        from multiprocessing import cpu_count
        num_workers = cpu_count() // 2 if num_workers is None else int(
            num_workers)

        os.makedirs(out_dir, exist_ok=True)
        preprocess_normalize.apply_normalization_dir2dir(
            in_dir, out_dir, scaler, inverse, num_workers)

    for i in range(len(sentences)):
        ws.wav_syn(
            wave_checkpoint_path, wave_output_dir,
            os.path.join(os.path.join(taco_output_dir, 'norm'),
                         'mel-sentence_{}.npy'.format(i)))
Exemplo n.º 19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--output-model-path',
                        dest='output_model_path',
                        required=True,
                        type=str,
                        default=os.path.dirname(os.path.realpath(__file__)),
                        help='Philly model output path.')

    parser.add_argument(
        '--name',
        help=
        'Name of logging directory if the two models were trained together.')
    parser.add_argument(
        '--tacotron_name',
        help='Name of logging directory of Tacotron. If trained separately')
    parser.add_argument('--model', default='Tacotron')
    parser.add_argument('--reference_audio')
    parser.add_argument('--speaker_id', default=0)
    parser.add_argument('--output_dir',
                        default='output/',
                        help='folder to contain synthesized mel spectrograms')
    parser.add_argument('--ppg_path',
                        default='',
                        help='Dir which contains test ppgs.')
    parser.add_argument('--lf0_path',
                        default='',
                        help='Dir which contains lf0.')
    #parser.add_argument('--speaker_id', default=4, help='Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids')
    args = parser.parse_args()

    accepted_models = ['Tacotron']

    if args.model not in accepted_models:
        raise ValueError(
            'please enter a valid model to synthesize with: {}'.format(
                accepted_models))

    taco_checkpoint, hparams = prepare_run(args)

    ppgs = get_ppgs(args)
    Lf0s = get_Lf0s(args)
    if args.reference_audio:
        audioName = args.reference_audio.split('/')[-1]
    if hparams.use_multispeaker:
        if hparams.dataset == "LibriTTS":
            # for libritts
            speaker_id = speakers.speaker2Id[int(
                audioName[0:audioName.find('_')])]
        if hparams.dataset.startswith("multi"):
            speaker_id = args.speaker_id
    else:
        speaker_id = 0
    speaker_ids = [speaker_id] * len(ppgs)
    #speaker_ids=[]
    if args.model == 'Tacotron':
        print(args.reference_audio)
        #exit()
        _ = tacotron_synthesize(args, hparams, taco_checkpoint, ppgs,
                                speaker_ids, Lf0s)
    else:
        raise ValueError('Model provided {} unknown! {}'.format(
            args.model, accepted_models))
Exemplo n.º 20
0
def main():
    accepted_modes = ['eval', 'synthesis', 'live']
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--taco_checkpoint',
        default=
        '/groups/ming/tacotron2/Blizzard-2012/logs-confidence=30/taco_pretrained/tacotron_model.ckpt-40000',
        help='Path to model checkpoint')
    parser.add_argument(
        '--wave_checkpoint',
        default=
        '/groups/ming/tacotron2/Blizzard-2012/logs-Wavenet/taco_pretrained/',
        help='Path to model checkpoint')
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--model', default='Tacotron-2')
    parser.add_argument('--input_dir',
                        default='/groups/ming/tacotron2/Blizzard-2012/data/',
                        help='folder to contain inputs sentences/targets')
    parser.add_argument(
        '--mels_dir',
        default='/groups/ming/tacotron2/Blizzard-2012/tacotron_output/eval/',
        help='folder to contain mels to synthesize audio from using the Wavenet'
    )
    parser.add_argument(
        '--output_dir',
        default='/groups/ming/tacotron2/Blizzard-2012/tacotron_output/',
        help='folder to contain synthesized mel spectrograms')
    parser.add_argument(
        '--mode',
        default='eval',
        help='mode of run: can be one of {}'.format(accepted_modes))
    parser.add_argument(
        '--GTA',
        default='True',
        help=
        'Ground truth aligned synthesis, defaults to True, only considered in synthesis mode'
    )
    parser.add_argument(
        '--modify_vae_dim',
        default=None,
        help=
        'The model will synthesize spectrogram with the specified dimensions of the VAE code modified. This variable must be a comma-separated list of dimensions. If None, synthesis will be based on the code generated by the VAE encoder without modification. The modification will be based on the mean and variance generated by the VAE encoder, while if in eval mode and reference_mel is not specified, the mean and variance of a unit Gaussian distribution will be considered for the modification. Considered only when hparams.use_vae=True and GTA=False.'
    )
    parser.add_argument(
        '--reference_mel',
        default=None,
        help=
        'The mel spectrogram file to be referenced. Valid if hparams.use_vae=True and GTA=False'
    )
    parser.add_argument(
        '--text_list',
        default='',
        help=
        'Text file contains list of texts to be synthesized. Valid if mode=eval'
    )
    parser.add_argument(
        '--speaker_id',
        default=None,
        help=
        'Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids'
    )
    args = parser.parse_args()

    accepted_models = ['Tacotron', 'WaveNet', 'Tacotron-2', 'Inference']

    if args.model not in accepted_models:
        raise ValueError(
            'please enter a valid model to synthesize with: {}'.format(
                accepted_models))

    if args.mode not in accepted_modes:
        raise ValueError('accepted modes are: {}, found {}'.format(
            accepted_modes, args.mode))

    if args.mode == 'live' and args.model == 'Wavenet':
        raise RuntimeError(
            'Wavenet vocoder cannot be tested live due to its slow generation. Live only works with Tacotron!'
        )

    if args.GTA not in ('True', 'False'):
        raise ValueError('GTA option must be either True or False')

    if args.model == 'Tacotron-2':
        if args.mode == 'live':
            warn(
                'Requested a live evaluation with Tacotron-2, Wavenet will not be used!'
            )
        if args.mode == 'synthesis':
            raise ValueError(
                'I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)'
            )

    if args.reference_mel is not None and not os.path.isfile(
            args.reference_mel):
        raise RuntimeError(
            'The reference mel-spectrogram file doesn\'t exist.')

    taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
    sentences = get_sentences(args)

    if args.model == 'Tacotron':
        _ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
    elif args.model == 'WaveNet':
        wavenet_synthesize(args, hparams, wave_checkpoint)
    elif args.model == 'Tacotron-2':
        synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
    elif args.model == 'Inference':
        inference(args, hparams, taco_checkpoint)
    else:
        raise ValueError('Model provided {} unknown! {}'.format(
            args.model, accepted_models))
Exemplo n.º 21
0
def synthesizeBytex(logId, tex):

    sentences = get_sentencesByTex(tex)
    wavPath = tacotron_synthesize(logId, sentences)
    return wavPath
Exemplo n.º 22
0
	if args.model == 'Tacotron-2':
		if args.mode == 'live':
			warn('Requested a live evaluation with Tacotron-2, Wavenet will not be used!')
		if args.mode == 'synthesis':
			raise ValueError('I don\'t recommend running WaveNet on entire dataset.. The world might end before the synthesis :) (only eval allowed)')

	taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
	sentences = get_sentences(args, hparams.chinese_dict)

	#preprocess args.speaker_id
	if args.speaker_id is not None:
		args.speaker_id = args.speaker_id.split(',')
		if not len(args.speaker_id) == len(sentences):
			args.speaker_id = [args.speaker_id[0]] * len(sentences)
		args.speaker_id = ','.join(args.speaker_id)



	if args.model == 'Tacotron':
		_ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
	elif args.model == 'WaveNet':
		wavenet_synthesize(args, hparams, wave_checkpoint)
	elif args.model == 'Tacotron-2':
		synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
	else:
		raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))


if __name__ == '__main__':
	main()
Exemplo n.º 23
0
def main():
    accepted_modes = ['eval', 'synthesis', 'alignment']

    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint',
                        required=True,
                        help='Path to model checkpoint')
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--variant', default='tacotron')
    parser.add_argument('--model', default='tacotron')
    parser.add_argument(
        '--gta',
        default='True',
        help=
        'Ground truth aligned synthesis, defaults to True, only considered in synthesis mode'
    )
    parser.add_argument(
        '--eal',
        default='False',
        help=
        'Explicit alignment locking, defaults to False, only considered in synthesis mode'
    )
    parser.add_argument(
        '--online',
        default=False,
        action='store_true',
        help='should be on if the model was trained by online eal')
    parser.add_argument(
        '--mode',
        default='eval',
        help='Mode of synthesis run, can be one of {}'.format(accepted_modes))
    parser.add_argument(
        '--text_list',
        default='',
        help=
        'Text file contains list of texts to be synthesized. Valid if mode=eval'
    )
    parser.add_argument('--output_dir',
                        default='output',
                        help='Folder to contain synthesized PML features')
    parser.add_argument('--training_dir',
                        default='training',
                        help='Folder that contains training data')
    parser.add_argument('--base_dir', default=os.path.expanduser('~/tacotron'))
    parser.add_argument('--batch_size',
                        default=100,
                        help='Number of PML vocoder feature trajectories to '
                        'synthesize at once')
    parser.add_argument(
        '--dataset',
        default='train',
        help='Data set to use, can be either train, validation or test.')
    parser.add_argument(
        '--checkpoint_eal',
        default=None,
        help='Path to model checkpoint, for pml features instead of alignments'
    )
    args = parser.parse_args()

    # cover off accidentally typing in training
    if args.dataset == 'training':
        args.dataset = 'train'

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    hparams.parse(args.hparams)

    accepted_models = ['tacotron', 'wavenet']

    if args.model not in accepted_models:
        raise ValueError('Accepted models are: {}, you entered: {}'.format(
            accepted_models, args.model))

    if args.mode not in accepted_modes:
        raise ValueError('Accepted modes are: {}, you entered: {}'.format(
            accepted_modes, args.model))

    if args.gta not in ('True', 'False'):
        raise ValueError(
            'Ground truth alignment option must be either True or False')

    sentences = get_sentences(args, hparams)
    #     import pdb
    #     pdb.set_trace()

    if args.model == 'tacotron':
        _ = tacotron_synthesize(args, hparams, args.checkpoint, sentences,
                                args.checkpoint_eal)