Пример #1
0
    def train_with_dataset_api(self,
                               X,
                               X_length,
                               Y,
                               init_dataset_train,
                               init_dataset_valid,
                               batch_size,
                               n_epoch,
                               learning_rate,
                               reg_lambda=0.,
                               patience=10,
                               verbose_interval=5,
                               save_dir_path=None,
                               **kwargs):

        try:
            if self.save_dir_path is None and save_dir_path is None:
                self.save_dir_path = "./tmp/{}".format(generate_id_with_date())

            if save_dir_path:
                self.save_dir_path = save_dir_path

            os.makedirs(self.save_dir_path)
        except Exception as e:
            print("*" * 30)
            print("Make directory with save_dir_path is failed")
            print("Maybe, there is directory already or error because of \"{}\"".format(str(e)))

        print("-" * 30)
        print("train start")
        patience_origin = patience

        # make sample
        self.sess.run(init_dataset_train)
        (sample_x,
         sample_x_len,
         sample_y) = self.sess.run([X, X_length, Y])

        if self.min_loss is None:
            self.min_loss = 999999999.

        for epoch_i in range(n_epoch):
            self.sess.run(init_dataset_train)
    
            batch_i = 0 
            while True:
                try:
                    (batch_x,
                     batch_x_len,
                     batch_y) = self.sess.run([X, X_length, Y])
                            
                    self.sess.run(self.updates,
                                  feed_dict={self.X: batch_x,
                                             self.Y: batch_y,
                                             self.X_length: batch_x_len,
                                             self.learning_rate: learning_rate,
                                             self.reg_lambda: reg_lambda,
                                             self.is_training: True})
                    batch_i += 1
                except tf.errors.OutOfRangeError:
                    break

            (train_loss,
             _) = self.evaluate_with_da(X,
                                        X_length,
                                        Y,
                                        init_dataset_train)
            (valid_loss,
             _) = self.evaluate_with_da(X,
                                        X_length,
                                        Y,
                                        init_dataset_valid)

            self.report_dict['train_loss'].append(train_loss)
            self.report_dict['valid_loss'].append(valid_loss)

            flag_print = epoch_i % verbose_interval == 0
            flag_better = valid_loss < self.min_loss
            if flag_print or flag_better:
                print("*"*30) if flag_better else print("-"*30)
                print_metric(epoch_i=epoch_i,
                             train_loss=train_loss,
                             valid_loss=valid_loss,
                             min_loss=self.min_loss)

                pred = self.sess.run(self.decoder_prediction,
                                     feed_dict={self.X: sample_x[:1],
                                                self.X_length: sample_x_len[:1],
                                                self.is_training: False})
                print("-" * 30)
                print("sample")
                print("orig : {}".format(sample_x[:1]))
                print("pred : {}".format(pred))

            if flag_better:
                patience = patience_origin + 1
                self.min_loss = valid_loss
                meta = {
                            'min_loss': self.min_loss,
                        }
                self.meta.update(meta)
                self.save_path = "{}/{}".format(self.save_dir_path, self.model_name)
                self.best_ckpt_path = self.save(self.save_path)
            patience -= 1
            if patience <= 0:
                break

        self.load(self.best_ckpt_path)

        (train_loss,
         _) = self.evaluate_with_da(X,
                                    X_length,
                                    Y,
                                    init_dataset_train)
        (valid_loss,
         _) = self.evaluate_with_da(X,
                                    X_length,
                                    Y,
                                    init_dataset_valid)

        pred = self.sess.run(self.decoder_prediction,
                             feed_dict={self.X: sample_x[:10],
                                        self.X_length: sample_x_len[:10],
                                        self.is_training: False})

        self.meta['report_dict'] = self.report_dict

        date_time_prefix = get_date_time_prefix()
        self.final_model_path = "{}/{}_final_{}".format(
            self.save_dir_path, date_time_prefix, self.model_name) 
        self.save(self.final_model_path)
        print("*"*30)
        print("final trained performance")
        print("-" * 30)
        print_metric(epoch_i=epoch_i,
                     train_loss=train_loss,
                     valid_loss=valid_loss,
                     min_loss=self.min_loss)
        print("-" * 30)
        print("sample")
        print("orig : {}".format(sample_x[:10]))
        print("pred : {}".format(pred))
        print("final_model_path: {}".format(self.final_model_path))
        print("train done")
        print("*"*30)

        return self
Пример #2
0
    def train(self,
              X_train,
              Y_train,
              X_valid,
              Y_valid,
              batch_size,
              n_epoch,
              learning_rate,
              reg_lambda=0.,
              patience=10,
              verbose_interval=1,
              save_dir_path=None,
              **kwargs):

        try:
            if self.save_dir_path is None and save_dir_path is None:
                self.save_dir_path = "./tmp/{}".format(generate_id_with_date())

            if save_dir_path:
                self.save_dir_path = save_dir_path

            os.makedirs(self.save_dir_path)
        except Exception as e:
            print("*" * 30)
            print("Make directory with save_dir_path is failed")
            print("Maybe, there is directory already or error because of \"{}\"".format(str(e)))

        print("-" * 30)
        print("train start")
        patience_origin = patience
        if self.min_loss is None:
            self.min_loss = 999999999.

        X_length_train = get_X_length(X_train)
        X_length_valid = get_X_length(X_valid)

        for epoch_i in range(n_epoch):
            rand_idx_list = np.random.permutation(range(len(X_train)))
            n_batch = len(rand_idx_list) // batch_size
            for batch_i in range(n_batch):
                rand_idx = rand_idx_list[batch_i *
                                         batch_size: (batch_i + 1) * batch_size]
                batch_x = X_train[rand_idx]
                batch_y = Y_train[rand_idx]
                batch_x_len = X_length_train[rand_idx]
                
                self.sess.run(self.updates,
                              feed_dict={self.X: batch_x,
                                         self.Y: batch_y,
                                         self.X_length: batch_x_len,
                                         self.learning_rate: learning_rate,
                                         self.reg_lambda: reg_lambda,
                                         self.is_training: True})

            train_loss = self.evaluate(
                X_train,  Y_train, X_length=X_length_train, batch_size=batch_size)
            valid_loss = self.evaluate(
                X_valid,  Y_valid, X_length=X_length_valid, batch_size=batch_size)

            self.report_dict['train_loss'].append(train_loss)
            self.report_dict['valid_loss'].append(valid_loss)

            flag_print = epoch_i % verbose_interval == 0
            flag_better = valid_loss < self.min_loss
            if flag_print or flag_better:
                print("*"*30) if flag_better else print("-"*30)
                print_metric(epoch_i=epoch_i,
                             train_loss=train_loss,
                             valid_loss=valid_loss,
                             min_loss=self.min_loss)

                pred = self.sess.run(self.decoder_prediction,
                                     feed_dict={self.X: X_train[:1],
                                                self.X_length: X_length_train[:1],
                                                self.is_training: False})
                print("-" * 30)
                print("sample")
                print("orig : {}".format(X_train[:1]))
                print("pred : {}".format(pred))

            if flag_better:
                patience = patience_origin + 1
                self.min_loss = valid_loss
                meta = {
                            'min_loss': self.min_loss,
                        }
                self.meta.update(meta)
                self.save_path = "{}/{}".format(self.save_dir_path, self.model_name)
                self.best_ckpt_path = self.save(self.save_path)
            patience -= 1
            if patience <= 0:
                break

        self.load(self.best_ckpt_path)

        train_loss = self.evaluate(
            X_train,  Y_train, X_length=X_length_train, batch_size=batch_size)
        valid_loss = self.evaluate(
            X_valid,  Y_valid, X_length=X_length_valid, batch_size=batch_size)

        pred = self.sess.run(self.decoder_prediction,
                             feed_dict={self.X: X_train[:10],
                                        self.X_length: X_length_train[:10],
                                        self.is_training: False})

        self.meta['report_dict'] = self.report_dict

        date_time_prefix = get_date_time_prefix()
        self.final_model_path = "{}/{}_final_{}".format(
            self.save_dir_path, date_time_prefix, self.model_name) 
        self.save(self.final_model_path)
        print("*"*30)
        print("final trained performance")
        print("-" * 30)
        print_metric(epoch_i=epoch_i,
                     train_loss=train_loss,
                     valid_loss=valid_loss,
                     min_loss=self.min_loss)
        print("-" * 30)
        print("sample")
        print("orig : {}".format(X_train[:10]))
        print("pred : {}".format(pred))
        print("final_model_path: {}".format(self.final_model_path))
        print("train done")
        print("*"*30)

        return self
Пример #3
0
    def train_with_dataset_api(self,
                               X,
                               Y,
                               init_dataset_train,
                               init_dataset_valid,
                               n_epoch,
                               learning_rate,
                               reg_lambda,
                               dropout_keep_prob,
                               patience,
                               mode=MODE_TRAIN_GLOBAL,
                               flag_preprocess=False,
                               verbose_interval=1,
                               save_dir_path=None):

        try:
            if self.save_dir_path is None and save_dir_path is None:
                self.save_dir_path = "./tmp/{}".format(generate_id_with_date())

            if save_dir_path:
                self.save_dir_path = save_dir_path

            os.makedirs(self.save_dir_path)
        except Exception as e:
            print("*" * 30)
            print("Make directory with save_dir_path is failed")
            print(
                "Maybe, there is directory already or error because of \"{}\"".
                format(str(e)))
        """
        Initialize
        """
        patience_origin = patience
        self.min_loss = 999999999.
        self.best_accuracy = 0.
        """
        Train only classifier
        """
        """
        tmp
        """

        if flag_preprocess:
            X_tensor = self.X
        else:
            X_tensor = self.X_preprocessed

        train_start_time = time.time()
        epoch_tqdm = tqdm(range(n_epoch))
        for epoch_i in epoch_tqdm:
            self.sess.run(init_dataset_train)
            batch_i = 0
            while True:
                try:
                    batch_start_time = time.time()
                    X_batch, Y_batch = self.sess.run([X, Y])
                    # TODO: remove most descriminative parts here

                    self.sess.run(self.update_dict[mode],
                                  feed_dict={
                                      X_tensor: X_batch,
                                      self.Y: Y_batch,
                                      self.learning_rate: learning_rate,
                                      self.reg_lambda: reg_lambda,
                                      self.dropout_keep_prob:
                                      dropout_keep_prob,
                                      self.is_training: True
                                  })

                    curr_time = time.time()
                    batch_time = curr_time - batch_start_time

                    epoch_tqdm.set_description(
                        "epoch {}, batch {} takes: {:0.2f} sec".format(
                            epoch_i, batch_i, batch_time))
                    batch_i += 1
                except tf.errors.OutOfRangeError:
                    break

            train_accuracy, train_loss, _, _ = self.check_accuracy_and_loss(
                X, Y, init_dataset_train, flag_preprocess=flag_preprocess)
            valid_accuracy, valid_loss, _, _ = self.check_accuracy_and_loss(
                X, Y, init_dataset_valid, flag_preprocess=flag_preprocess)

            self.report_dict['valid_loss'].append(valid_loss)
            self.report_dict['train_loss'].append(train_loss)
            self.report_dict['valid_accuracy'].append(valid_accuracy)
            self.report_dict['train_accuracy'].append(train_accuracy)

            if verbose_interval:
                if epoch_i % verbose_interval == 0:
                    print("-" * 30)
                    print("epoch_i : {}".format(epoch_i))
                    print("train loss: {}, train accuracy: {}".format(
                        train_loss, train_accuracy))
                    print("valid loss: {}, valid accuracy: {}".format(
                        valid_loss, valid_accuracy))
                    print(
                        "best valid loss: {}, best valid accuracy : {}".format(
                            self.min_loss, self.best_accuracy))

            if valid_accuracy > self.best_accuracy:
                patience = patience_origin
                self.min_loss = valid_loss
                self.best_accuracy = valid_accuracy

                meta = {
                    'input_shape': self.input_shape,
                    'output_dim': self.output_dim,
                    'min_loss': self.min_loss,
                    'best_accuracy': self.best_accuracy,
                    'flag_preprocess': self.flag_preprocess,
                }
                self.meta.update(meta)
                self.best_ckpt_path = "{}/{}".format(self.save_dir_path,
                                                     self.model_name)
                self.best_ckpt_path = self.save(self.best_ckpt_path)

                print("*" * 30)
                print("epoh_i : {}".format(epoch_i))
                print("train loss: {}, train accuracy: {}".format(
                    train_loss, train_accuracy))
                print("valid loss: {}, valid accuracy: {}".format(
                    valid_loss, valid_accuracy))
                print("best valid loss: {}, best valid accuracy : {}".format(
                    self.min_loss, self.best_accuracy))
                print("save current model : {}".format(self.best_ckpt_path))
            else:
                patience -= 1
            if patience <= 0:
                break

        print("train takes : {} sec".format(time.time() - train_start_time))
        self.load(self.best_ckpt_path)

        train_accuracy, train_loss, _, _ = self.check_accuracy_and_loss(
            X, Y, init_dataset_train, flag_preprocess=flag_preprocess)
        valid_accuracy, valid_loss, _, _ = self.check_accuracy_and_loss(
            X, Y, init_dataset_valid, flag_preprocess=flag_preprocess)

        self.meta['report_dict'] = self.report_dict

        date_time_prefix = get_date_time_prefix()
        self.final_model_path = "{}/{}_final_{}".format(
            self.save_dir_path, date_time_prefix, self.model_name)

        self.save(self.final_model_path)
        print("*" * 30)
        print("final trained performance")
        print("train loss: {}, train accuracy: {}".format(
            train_loss, train_accuracy))
        print("valid loss: {}, valid accuracy: {}".format(
            valid_loss, valid_accuracy))
        print("best valid loss: {}, best valid accuracy : {}".format(
            self.min_loss, self.best_accuracy))
        print("final_model_path: {}".format(self.final_model_path))
        print("train done")
        print("*" * 30)

        return self
Пример #4
0
    def train(self,
              X_train,
              Y_train,
              X_valid,
              Y_valid,
              batch_size,
              n_epoch,
              learning_rate,
              reg_lambda=0.,
              patience=100,
              verbose_interval=20,
              save_dir_path=None,
              **kwargs):

        try:
            if self.save_dir_path is None and save_dir_path is None:
                self.save_dir_path = "./tmp/{}".format(generate_id_with_date())

            if save_dir_path:
                self.save_dir_path = save_dir_path

            os.makedirs(self.save_dir_path)
        except Exception as e:
            print("*" * 30)
            print("Make directory with save_dir_path is failed")
            print(
                "Maybe, there is directory already or error because of \"{}\"".
                format(str(e)))

        X_train_org = X_train
        if self.flag_preprocess:
            print("-" * 30)
            print("preprocess start")
            self.prepare_preprocess(X_train)
            X_train = self.preprocess(X_train)
            print("preprocess done")

        print("-" * 30)
        print("train start")
        patience_origin = patience
        if self.min_loss is None:
            self.min_loss = 999999999.
        for epoch_i in range(n_epoch):
            rand_idx_list = np.random.permutation(range(len(X_train)))
            n_batch = len(rand_idx_list) // batch_size
            for batch_i in range(n_batch):
                rand_idx = rand_idx_list[batch_i * batch_size:(batch_i + 1) *
                                         batch_size]
                batch_x = X_train[rand_idx]
                batch_y = Y_train[rand_idx]

                self.sess.run(self.updates,
                              feed_dict={
                                  self.X: batch_x,
                                  self.Y: batch_y,
                                  self.learning_rate: learning_rate,
                                  self.reg_lambda: reg_lambda,
                                  self.is_training: True
                              })

            _, valid_accuracy, valid_loss = self.evaluate(
                X_valid, Y_valid, batch_size)
            _, train_accuracy, train_loss = self.evaluate(
                X_train_org, Y_train, batch_size)

            self.report_dict['valid_loss'].append(valid_loss)
            self.report_dict['train_loss'].append(train_loss)
            self.report_dict['valid_accuracy'].append(valid_accuracy)
            self.report_dict['train_accuracy'].append(train_accuracy)

            if verbose_interval:
                if epoch_i % verbose_interval == 0:
                    print("-" * 30)
                    print("epoch_i : {}".format(epoch_i))
                    print("train loss: {}, train accuracy: {}".format(
                        train_loss, train_accuracy))
                    print("valid loss: {}, valid accuracy: {}".format(
                        valid_loss, valid_accuracy))
                    print(
                        "best valid loss: {}, best valid accuracy : {}".format(
                            self.min_loss, self.best_accuracy))

            if valid_loss < self.min_loss:
                patience = patience_origin + 1

                self.min_loss = valid_loss
                self.best_accuracy = valid_accuracy

                meta = {
                    'input_dim': self.input_dim,
                    'output_dim': self.output_dim,
                    'min_loss': self.min_loss,
                    'best_accuracy': self.best_accuracy,
                    'mean': self.mean,
                    'std': self.std,
                    'flag_preprocess': self.flag_preprocess,
                }
                self.meta.update(meta)
                self.save_path = "{}/{}".format(self.save_dir_path,
                                                self.model_name)
                self.best_ckpt_path = self.save(self.save_path)

                print("*" * 30)
                print("epoh_i : {}".format(epoch_i))
                print("train loss: {}, train accuracy: {}".format(
                    train_loss, train_accuracy))
                print("valid loss: {}, valid accuracy: {}".format(
                    valid_loss, valid_accuracy))
                print("best valid loss: {}, best valid accuracy : {}".format(
                    self.min_loss, self.best_accuracy))
                print("save current model : {}".format(self.best_ckpt_path))

            patience -= 1
            if patience <= 0:
                break

        self.load(self.best_ckpt_path)
        _, valid_accuracy, valid_loss = self.evaluate(X_valid, Y_valid,
                                                      batch_size)
        _, train_accuracy, train_loss = self.evaluate(X_train_org, Y_train,
                                                      batch_size)
        self.meta['report_dict'] = self.report_dict

        date_time_prefix = get_date_time_prefix()
        self.final_model_path = "{}/{}_final_{}".format(
            self.save_dir_path, date_time_prefix, self.model_name)
        self.save(self.final_model_path)
        print("*" * 30)
        print("final trained performance")
        print("train loss: {}, train accuracy: {}".format(
            train_loss, train_accuracy))
        print("valid loss: {}, valid accuracy: {}".format(
            valid_loss, valid_accuracy))
        print("best valid loss: {}, best valid accuracy : {}".format(
            self.min_loss, self.best_accuracy))
        print("final_model_path: {}".format(self.final_model_path))
        print("train done")
        print("*" * 30)

        return self.sess
Пример #5
0
    def train(self,
              X_train,
              Y_train,
              X_valid,
              Y_valid,
              save_dir_path='./tmp',
              **kwargs):

        try:
            if self.save_dir_path is None and save_dir_path is None:
                self.save_dir_path = "./tmp/{}".format(generate_id_with_date())

            if save_dir_path:
                self.save_dir_path = save_dir_path

            os.makedirs(self.save_dir_path)
        except Exception as e:
            print("*" * 30)
            print("Make directory with save_dir_path is failed")
            print(
                "Maybe, there is directory already or error because of \"{}\"".
                format(str(e)))

        X_train_org = X_train
        if self.flag_preprocess:
            self.prepare_preprocess(X_train)
            X_train = self.preprocess(X_train)

        if self.min_loss is None:
            self.min_loss = 999999999

        rand_idx_list = np.random.permutation(range(len(X_train)))
        n_batch = len(rand_idx_list) // self.batch_size
        for batch_i in range(n_batch):
            rand_idx = rand_idx_list[batch_i * self.batch_size:(batch_i + 1) *
                                     self.batch_size]
            batch_x = X_train[rand_idx]
            batch_y = Y_train[rand_idx]

            h = self.sess.run(self.set_H, feed_dict={self.X_batch: batch_x})

            if not self.flag_init:
                self.sess.run(self.init_P0)
                self.sess.run(self.init_beta,
                              feed_dict={self.T_batch: batch_y})
                self.flag_init = True
            else:
                self.sess.run(self.swap_P)
                self.sess.run(self.swap_beta)
                self.sess.run(self.update_P)
                self.sess.run(self.update_beta, {self.T_batch: batch_y})

        _, valid_accuracy, valid_loss = self.evaluate(X_valid, Y_valid,
                                                      self.batch_size)
        _, train_accuracy, train_loss = self.evaluate(X_train_org, Y_train,
                                                      self.batch_size)

        print("*" * 30)
        self.min_loss = valid_loss
        self.best_accuracy = valid_accuracy

        print("*" * 30)
        print("train loss: {}, train accuracy: {}".format(
            train_loss, train_accuracy))
        print("valid loss: {}, valid accuracy: {}".format(
            valid_loss, valid_accuracy))
        print("best valid loss: {}, best valid accuracy : {}".format(
            self.min_loss, self.best_accuracy))

        self.meta = {
            'input_dim': self.input_dim,
            'output_dim': self.output_dim,
            'min_loss': self.min_loss,
            'best_accuracy': self.best_accuracy,
            'mean': self.mean,
            'std': self.std,
            'flag_preprocess': self.flag_preprocess,
        }

        date_time_prefix = get_date_time_prefix()
        self.final_model_path = "{}/{}_final_{}".format(
            self.save_dir_path, date_time_prefix, self.model_name)
        self.save(self.final_model_path)
        print("final_model_path: {}".format(self.final_model_path))
        print("*" * 30)

        return self.sess