def __init__(self, checkpoint_path, hparams, gta=False, model_name="Tacotron", seed=None): log("Constructing model: %s" % model_name) # Initialize tensorflow random number seed for deterministic operation if provided if seed is not None: tf.compat.v1.set_random_seed(seed) #Force the batch size to be known in order to use attention masking in batch synthesis inputs = tf.compat.v1.placeholder(tf.int32, (None, None), name="inputs") input_lengths = tf.compat.v1.placeholder(tf.int32, (None,), name="input_lengths") speaker_embeddings = tf.compat.v1.placeholder(tf.float32, (None, hparams.speaker_embedding_size), name="speaker_embeddings") targets = tf.compat.v1.placeholder(tf.float32, (None, None, hparams.num_mels), name="mel_targets") split_infos = tf.compat.v1.placeholder(tf.int32, shape=(hparams.tacotron_num_gpus, None), name="split_infos") with tf.compat.v1.variable_scope("Tacotron_model") as scope: self.model = create_model(model_name, hparams) if gta: self.model.initialize(inputs, input_lengths, speaker_embeddings, targets, gta=gta, split_infos=split_infos) else: self.model.initialize(inputs, input_lengths, speaker_embeddings, split_infos=split_infos) self.mel_outputs = self.model.tower_mel_outputs self.linear_outputs = self.model.tower_linear_outputs if (hparams.predict_linear and not gta) else None self.alignments = self.model.tower_alignments self.stop_token_prediction = self.model.tower_stop_token_prediction self.targets = targets self.gta = gta self._hparams = hparams #pad input sequences with the <pad_token> 0 ( _ ) self._pad = 0 #explicitely setting the padding to a value that doesn"t originally exist in the spectogram #to avoid any possible conflicts, without affecting the output range of the model too much if hparams.symmetric_mels: self._target_pad = -hparams.max_abs_value else: self._target_pad = 0. self.inputs = inputs self.input_lengths = input_lengths self.speaker_embeddings = speaker_embeddings self.targets = targets self.split_infos = split_infos log("Loading checkpoint: %s" % checkpoint_path) #Memory allocation on the GPUs as needed config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True self.session = tf.compat.v1.Session(config=config) self.session.run(tf.compat.v1.global_variables_initializer()) saver = tf.compat.v1.train.Saver() saver.restore(self.session, checkpoint_path)
def model_test_mode(args, feeder, hparams, global_step): with tf.variable_scope("Tacotron_model", reuse=tf.AUTO_REUSE) as scope: model = create_model("Tacotron", hparams) model.initialize(feeder.eval_inputs, feeder.eval_input_lengths, feeder.eval_speaker_embeddings, feeder.eval_mel_targets, feeder.eval_token_targets, targets_lengths=feeder.eval_targets_lengths, global_step=global_step, is_training=False, is_evaluating=True, split_infos=feeder.eval_split_infos) model.add_loss() return model
def model_train_mode(args, feeder, hparams, global_step): with tf.variable_scope("Tacotron_model", reuse=tf.AUTO_REUSE) as scope: model = create_model("Tacotron", hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.speaker_embeddings, feeder.mel_targets, feeder.token_targets, targets_lengths=feeder.targets_lengths, global_step=global_step, is_training=True, split_infos=feeder.split_infos) model.add_loss() model.add_optimizer(global_step) stats = add_train_stats(model, hparams) return model, stats