def train(self, epoch, data, spklist, aux_data=None):
     self.network.train()
     curr_step = 0
     data_loader = KaldiDataRandomQueue(data, spklist,
                                        num_parallel=self.params.num_parallel_datasets,
                                        max_qsize=self.params.max_queue_size,
                                        num_speakers=self.params.num_speakers_per_batch,
                                        num_segments=self.params.num_segments_per_speaker,
                                        min_len=self.params.min_segment_len,
                                        max_len=self.params.max_segment_len,
                                        shuffle=True)
     data_loader.start()
     sum_loss, sum_samples = 0, 0
     for step in range(curr_step % self.params.num_steps_per_epoch, self.params.num_steps_per_epoch):
         features, labels = data_loader.fetch()
         sum_samples += len(features)
         features, labels = self.transform(features, labels)
         out, _ = self.network(features)
         torch.cuda.empty_cache()
         loss = self.loss_network(out, labels)
         sum_loss += loss.item() * len(features)
         if step % self.params.show_training_process == 0:
             with open(os.path.join(self.model_log, "iter_loss_log"), 'a') as iter_f:
                 iter_f.write("Time:{}, Epoch:{}, Iter:{}, Loss:{}\n".format(
                     time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
                     epoch, step, sum_loss / sum_samples))
         self.optimizer.zero_grad()
         loss.backward()
         self.optimizer.step()
         curr_step += 1
     with open(os.path.join(self.model_log, "epoch_loss_log"), 'a') as epoch_f:
         epoch_f.write("Time:{}, Epoch:{}, Loss:{}\n".format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
                                                             epoch, sum_loss / sum_samples))
         self.save(epoch=epoch, model=self.network, optimizer=self.optimizer)
     data_loader.stop()
Exemplo n.º 2
0
    def train_tune_lr(self, data, spklist, tune_period=100, aux_data=None):
        """Tune the learning rate.

        According to: https://www.kdnuggets.com/2017/11/estimating-optimal-learning-rate-deep-neural-network.html

        Args:
            data: The training data directory.
            spklist: The spklist is a file map speaker name to the index.
            tune_period: How many steps per learning rate.
            aux_data: The auxiliary data directory.
        """
        # initialize all variables
        self.sess.run(tf.global_variables_initializer())

        # We need to load the model sometimes, since we may try to find the learning rate for fine-tuning.
        if os.path.isfile(os.path.join(self.model, "checkpoint")):
            self.load()

        data_loader = KaldiDataRandomQueue(
            data,
            spklist,
            num_parallel=self.params.num_parallel_datasets,
            max_qsize=self.params.max_queue_size,
            num_speakers=self.params.num_speakers_per_batch,
            num_segments=self.params.num_segments_per_speaker,
            min_len=self.params.min_segment_len,
            max_len=self.params.max_segment_len,
            shuffle=True)
        data_loader.start()

        # The learning rate normally varies from 1e-5 to 1
        # Some common values:
        # 1. factor = 1.15
        #    tune_period = 200
        #    tune_times = 100
        init_learning_rate = 1e-5
        factor = 1.15
        tune_times = 100

        fp_lr = open(os.path.join(self.model, "learning_rate_tuning"), "w")
        for step in range(tune_period * tune_times):
            lr = init_learning_rate * (factor**(step // tune_period))
            try:
                if step % tune_period == 0:
                    train_ops = [
                        self.train_ops, self.train_op, self.train_summary
                    ]
                    # train_ops = [self.train_ops, self.train_op]
                    start_time = time.time()
                    features, labels = data_loader.fetch()
                    train_val = self.sess.run(train_ops,
                                              feed_dict={
                                                  self.train_features:
                                                  features,
                                                  self.train_labels: labels,
                                                  self.global_step: 0,
                                                  self.learning_rate: lr
                                              })
                    end_time = time.time()
                    tf.logging.info(
                        "Epoch: step: %2d, time: %.4f s/step, lr: %f, raw loss: %f, total loss: %f" \
                        % (step, end_time - start_time, lr,
                           train_val[0]["raw_loss"], train_val[0]["loss"]))
                    fp_lr.write("%d %f %f\n" %
                                (step, lr, train_val[0]["loss"]))
                    self.summary_writer.add_summary(train_val[-1], step)
                else:
                    features, labels = data_loader.fetch()
                    _ = self.sess.run(self.train_op,
                                      feed_dict={
                                          self.train_features: features,
                                          self.train_labels: labels,
                                          self.global_step: 0,
                                          self.learning_rate: lr
                                      })
            except DataOutOfRange:
                tf.logging.info("Finished reading features.")
                break
        data_loader.stop()
        fp_lr.close()
        return
Exemplo n.º 3
0
    def train(self, data, spklist, learning_rate, aux_data=None):
        """Train the model.

        Args:
            data: The training data directory.
            spklist: The spklist is a file map speaker name to the index.
            learning_rate: The learning rate is passed by the main program. The main program can easily tune the
                           learning rate according to the validation accuracy or anything else.
            aux_data: The auxiliary data (maybe useful in child class.)
        """
        # initialize all variables
        self.sess.run(tf.global_variables_initializer())

        # curr_step is the real step the training at.
        curr_step = 0

        # Load the model if we have
        if os.path.isfile(os.path.join(self.model, "checkpoint")):
            curr_step = self.load()

        # The data loader
        data_loader = KaldiDataRandomQueue(
            data,
            spklist,
            num_parallel=self.params.num_parallel_datasets,
            max_qsize=self.params.max_queue_size,
            num_speakers=self.params.num_speakers_per_batch,
            num_segments=self.params.num_segments_per_speaker,
            min_len=self.params.min_segment_len,
            max_len=self.params.max_segment_len,
            shuffle=True)
        data_loader.start()

        epoch = int(curr_step / self.params.num_steps_per_epoch)
        for step in range(curr_step % self.params.num_steps_per_epoch,
                          self.params.num_steps_per_epoch):
            try:
                if step % self.params.save_summary_steps == 0 or step % self.params.show_training_progress == 0:
                    train_ops = [self.train_ops, self.train_op]
                    if step % self.params.save_summary_steps == 0:
                        train_ops.append(self.train_summary)
                    start_time = time.time()
                    features, labels = data_loader.fetch()
                    train_val = self.sess.run(train_ops,
                                              feed_dict={
                                                  self.train_features:
                                                  features,
                                                  self.train_labels: labels,
                                                  self.global_step: curr_step,
                                                  self.learning_rate:
                                                  learning_rate
                                              })
                    end_time = time.time()
                    tf.logging.info(
                        "Epoch: [%2d] step: [%2d/%2d] time: %.4f s/step, raw loss: %f, total loss: %f"
                        % (epoch, step, self.params.num_steps_per_epoch,
                           end_time - start_time, train_val[0]["raw_loss"],
                           train_val[0]["loss"]))
                    if step % self.params.save_summary_steps == 0:
                        self.summary_writer.add_summary(
                            train_val[-1], curr_step)
                else:
                    # Only compute optimizer.
                    features, labels = data_loader.fetch()
                    _ = self.sess.run(self.train_op,
                                      feed_dict={
                                          self.train_features: features,
                                          self.train_labels: labels,
                                          self.global_step: curr_step,
                                          self.learning_rate: learning_rate
                                      })

                if step % self.params.save_checkpoints_steps == 0 and curr_step != 0:
                    self.save(curr_step)
                curr_step += 1
            except DataOutOfRange:
                tf.logging.info("Finished reading features.")
                break

        data_loader.stop()
        self.save(curr_step)

        return
    def train(self, data, spklist, learning_rate, aux_data=None):
        """Train the model.

        Args:
            data: The training data directory.
            spklist: The spklist is a file map speaker name to the index.
            learning_rate: The learning rate is passed by the main program. The main program can easily tune the
                           learning rate according to the validation accuracy or anything else.
            aux_data: The auxiliary data (maybe useful in child class.)
        """
        # initialize all variables
        # graph = tf.get_default_graph()
        # kernel_six = graph.get_tensor_by_name('tdnn_svd6/tdnn6.5_dense/kernel:0')
        # def get_semi_orthogonal(mat):
    #pri# nt(mat.shape)
        #     M = tf.transpose(mat)
        #     #M = mat
        #     I = tf.Variable(np.identity(M.shape[0]), dtype=tf.float32)
        #     for _ in range(10):
        #         P = tf.matmul(M, M, transpose_b=True)
        #         alpha2 = tf.divide(tf.trace(tf.matmul(P, P, transpose_b=True)), tf.trace(P))
        #         M = M - (1 / (2.0 * alpha2)) * tf.matmul(tf.subtract(P, alpha2 * I), M)
        #     P = tf.matmul(M, M, transpose_b=True)
        #     alpha2 = tf.divide(tf.trace(tf.matmul(P, P, transpose_b=True)), tf.trace(P))
        #     M = M / alpha2 
        #     return tf.transpose(M)

        # semi = get_semi_orthogonal(kernel_six)
        # semi_op = tf.assign(kernel_six, semi)

        self.sess.run(tf.global_variables_initializer())

        # curr_step is the real step the training at.
        curr_step = 0

        # Load the model if we have
        if os.path.isfile(os.path.join(self.model, "checkpoint")):
            curr_step = self.load()

        # The data loader
        data_loader = KaldiDataRandomQueue(data, spklist,
                                           num_parallel=self.params.num_parallel_datasets,
                                           max_qsize=self.params.max_queue_size,
                                           num_speakers=self.params.num_speakers_per_batch,
                                           num_segments=self.params.num_segments_per_speaker,
                                           min_len=self.params.min_segment_len,
                                           max_len=self.params.max_segment_len,
                                           shuffle=True)
        epoch = int(curr_step / self.params.num_steps_per_epoch)
        data_loader.start()
        for step in range(curr_step % self.params.num_steps_per_epoch, self.params.num_steps_per_epoch):
            try:
        #         if step % 4 == 0:
        #             # SEMI ORTHOGONA;
        #             self.sess.run(semi_op)
                if step % self.params.save_summary_steps == 0 or step % self.params.show_training_progress == 0:
                    train_ops = [self.train_ops, self.train_op]
                    if step % self.params.save_summary_steps == 0:
                        train_ops.append(self.train_summary)
                    start_time = time.time()
                    features, labels = data_loader.fetch()
                    train_val = self.sess.run(train_ops, feed_dict={self.train_features: features,
                                                                    self.train_labels: labels,
                                                                    self.global_step: curr_step,
                                                                    self.learning_rate: learning_rate})
                    end_time = time.time()
                    tf.logging.info(
                        "Epoch: [%2d] step: [%2d/%2d] time: %.4f s/step, raw loss: %f, total loss: %f"
                        % (epoch, step, self.params.num_steps_per_epoch, end_time - start_time,
                           train_val[0]["raw_loss"], train_val[0]["loss"]))
                    if step % self.params.save_summary_steps == 0:
                        self.summary_writer.add_summary(train_val[-1], curr_step)
                else:
                    # Only compute optimizer.
                    features, labels = data_loader.fetch()
                    _ = self.sess.run(self.train_op, feed_dict={self.train_features: features,
                                                                self.train_labels: labels,
                                                                self.global_step: curr_step,
                                                                self.learning_rate: learning_rate})

                if step % self.params.save_checkpoints_steps == 0 and curr_step != 0:
                    self.save(curr_step)
                curr_step += 1
            except DataOutOfRange:
                tf.logging.info("Finished reading features.")
                break

        data_loader.stop()
        self.save(curr_step)

        return