Exemplo n.º 1
0
 def testModel(self):
     n_test_samples, max_length = self.data['X_test'].shape
     accuracy_test = []
     preds_test = []
     self.initModel()
     test_bar = ProgressBar('Testing', max=len(self.data['X_test']))
     for batch in minibatches_iter(self.data['X_test'],
                                   self.data['Y_test'],
                                   masks=self.data['mask_test'],
                                   char_inputs=self.data['C_test'],
                                   lexicons=self.lexicons['lexicons_test'],
                                   batch_size=self.batch_size):
         inputs, targets, masks, char_inputs, lexicons = batch
         test_bar.next(len(inputs))
         corrects = self.model.eval_fn(inputs, targets, masks, lexicons)
         _, preds = self.model.test_fn(inputs, targets, masks, lexicons)
         preds_test.append(preds)
         accuracy_test.append(corrects)
     this_test_accuracy = np.concatenate(
         accuracy_test)[0:n_test_samples].sum() / float(n_test_samples)
     test_bar.finish()
     print("Test accuracy: " + str(this_test_accuracy * 100) + "%")
     compute_f1_score(self.data['Y_test'], preds_test)
    def trainingModel(self):

        self.initModel()

        best_acc = 0
        best_validation_accuracy = 0
        stop_count = 0
        lr = self.learning_rate
        patience = self.patience
        n_dev_samples, max_length = self.data['X_dev'].shape
        n_test_samples, max_length = self.data['X_test'].shape

        for epoch in range(1, self.num_epochs + 1):
            print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (
                epoch, lr, self.decay_rate)
            train_err = 0.0
            train_batches = 0
            train_bar = ProgressBar('Training', max=len(self.data['X_train']))
            for batch in minibatches_iter(
                    self.data['X_train'],
                    self.data['Y_train'],
                    masks=self.data['mask_train'],
                    char_inputs=self.data['C_train'],
                    lexicons=self.lexicons['lexicons_train'],
                    batch_size=self.batch_size,
                    shuffle=True):
                inputs, targets, masks, char_inputs, lexicons = batch
                err = self.model.train_fn(inputs, targets, masks, char_inputs,
                                          lexicons)
                train_err += err
                train_bar.next(len(inputs))

                if train_batches > 0 and train_batches % self.valid_freq == 0:
                    accuracy_valid = []
                    for batch in minibatches_iter(
                            self.data['X_dev'],
                            self.data['Y_dev'],
                            masks=self.data['mask_dev'],
                            lexicons=self.lexicons['lexicons_dev'],
                            char_inputs=self.data['C_dev'],
                            batch_size=self.batch_size):
                        inputs, targets, masks, char_inputs, lexicons = batch
                        accuracy_valid.append(
                            self.model.eval_fn(inputs, targets, masks,
                                               char_inputs, lexicons))
                    this_validation_accuracy = np.concatenate(accuracy_valid)[
                        0:n_dev_samples].sum() / float(n_dev_samples)

                    if this_validation_accuracy > best_validation_accuracy:
                        print("\nTrain loss, " + str(
                            (train_err / self.valid_freq)) +
                              ", validation accuracy: " +
                              str(this_validation_accuracy * 100) + "%")
                        best_validation_accuracy = this_validation_accuracy
                        preds_test = []
                        accuracy_test = []
                        for batch in minibatches_iter(
                                self.data['X_test'],
                                self.data['Y_test'],
                                masks=self.data['mask_test'],
                                char_inputs=self.data['C_test'],
                                lexicons=self.lexicons['lexicons_test'],
                                batch_size=self.batch_size):
                            inputs, targets, masks, char_inputs, lexicons = batch
                            _, preds = self.model.test_fn(
                                inputs, targets, masks, char_inputs, lexicons)
                            preds_test.append(preds)
                            accuracy_test.append(
                                self.model.eval_fn(inputs, targets, masks,
                                                   char_inputs, lexicons))
                        this_test_accuracy = np.concatenate(accuracy_test)[
                            0:n_test_samples].sum() / float(n_test_samples)
                        # print "F1-score: " + str(compute_f1_score(self.data["Y_test"], preds_test, self.data['label_alphabet']) * 100)
                        print("Test accuracy: " +
                              str(this_test_accuracy * 100) + "%")
                        if best_acc < this_test_accuracy:
                            best_acc = this_test_accuracy
                            write_model_data(self.model.network,
                                             self.model_path + '/best_model')

                    train_err = 0
                train_batches += 1

            train_bar.finish()

            # stop if dev acc decrease 3 time straightly.
            if stop_count == patience:
                break

            # re-compile a function with new learning rate for training
            if self.update_algo != 'adadelta':
                lr = self.learning_rate / (1.0 + epoch * self.decay_rate)
                updates = utils.create_updates(self.model.loss_train,
                                               self.model.params,
                                               self.update_algo,
                                               lr,
                                               momentum=self.momentum)
                self.model.train_fn = theano.function(
                    [
                        self.model.input_var, self.model.target_var,
                        self.model.mask_var, self.model.char_input_var,
                        self.model.lex_var
                    ],
                    outputs=self.model.loss_train,
                    updates=updates,
                    allow_input_downcast=True)

            print("Epoch " + str(epoch) + " finished.")
        print("The final best acc: " + str(best_acc * 100) + "%")

        if self.output_predict:
            f = codecs.open('./results/10-fold.txt', 'a+', 'utf-8')
            f.write(str(best_acc * 100) + '\n')
            f.close()