def load(self,
             checkpoint_path,
             num_speakers=2,
             checkpoint_step=None,
             inference_prenet_dropout=True,
             model_name='tacotron'):
        self.num_speakers = num_speakers

        if os.path.isdir(checkpoint_path):
            load_path = checkpoint_path
            checkpoint_path = get_most_recent_checkpoint(
                checkpoint_path, checkpoint_step)
        else:
            load_path = os.path.dirname(checkpoint_path)

        print('Constructing model: %s' % model_name)

        inputs = tf.placeholder(tf.int32, [None, None], 'inputs')
        input_lengths = tf.placeholder(tf.int32, [None], 'input_lengths')

        batch_size = tf.shape(inputs)[0]
        speaker_id = tf.placeholder_with_default(
            tf.zeros([batch_size], dtype=tf.int32), [None], 'speaker_id')

        load_hparams(hparams, load_path)
        hparams.inference_prenet_dropout = inference_prenet_dropout
        with tf.variable_scope('model') as scope:
            self.model = create_model(hparams)

            self.model.initialize(inputs=inputs,
                                  input_lengths=input_lengths,
                                  num_speakers=self.num_speakers,
                                  speaker_id=speaker_id,
                                  is_training=False)
            self.wav_output = inv_spectrogram_tensorflow(
                self.model.linear_outputs, hparams)

        print('Loading checkpoint: %s' % checkpoint_path)

        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     intra_op_parallelism_threads=1,
                                     inter_op_parallelism_threads=2)
        sess_config.gpu_options.allow_growth = True

        self.sess = tf.Session(config=sess_config)
        self.sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(self.sess, checkpoint_path)
Exemplo n.º 2
0
    def load(self, checkpoint_path: object, model_name: object = 'tacotron') -> object:
        print('Constructing model: %s' % model_name)
        self.model_filename = checkpoint_path

        if not checkpoint_path.endswith('.pb'):
            inputs = tf.placeholder(tf.int32, [None, None], 'inputs')
            input_lengths = tf.placeholder(tf.int32, [None], 'input_lengths')
            with tf.variable_scope('model') as scope:
                self.model = create_model(model_name, hparams)
                self.model.initialize(inputs, input_lengths)
                self.wav_output = audio.inv_spectrogram_tensorflow(self.model.linear_outputs)
                # self.wav_output = self.model.linear_outputs[0]

            print('Loading checkpoint: %s' % checkpoint_path)
            # self.session = tf.Session()
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
            self.session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
            self.session.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(self.session, checkpoint_path)
        else:
            model_filename = checkpoint_path
            with open(model_filename, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                graph = tf.get_default_graph()
                tf.import_graph_def(graph_def, name='')

            self.inputs = graph.get_tensor_by_name("inputs:0")  # 在训练的时候其实可以自己设置
            self.input_lengths = graph.get_tensor_by_name("input_lengths:0")
            self.wav_output = graph.get_tensor_by_name("model/griffinlim/Squeeze:0")

            print('Loading pb: %s' % model_filename)
            # self.session = tf.Session()
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
            self.session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
            self.session.run(tf.global_variables_initializer())