def testModel(self): n_test_samples, max_length = self.data['X_test'].shape accuracy_test = [] preds_test = [] self.initModel() test_bar = ProgressBar('Testing', max=len(self.data['X_test'])) for batch in minibatches_iter(self.data['X_test'], self.data['Y_test'], masks=self.data['mask_test'], char_inputs=self.data['C_test'], lexicons=self.lexicons['lexicons_test'], batch_size=self.batch_size): inputs, targets, masks, char_inputs, lexicons = batch test_bar.next(len(inputs)) corrects = self.model.eval_fn(inputs, targets, masks, lexicons) _, preds = self.model.test_fn(inputs, targets, masks, lexicons) preds_test.append(preds) accuracy_test.append(corrects) this_test_accuracy = np.concatenate( accuracy_test)[0:n_test_samples].sum() / float(n_test_samples) test_bar.finish() print("Test accuracy: " + str(this_test_accuracy * 100) + "%") compute_f1_score(self.data['Y_test'], preds_test)
def trainingModel(self): self.initModel() best_acc = 0 best_validation_accuracy = 0 stop_count = 0 lr = self.learning_rate patience = self.patience n_dev_samples, max_length = self.data['X_dev'].shape n_test_samples, max_length = self.data['X_test'].shape for epoch in range(1, self.num_epochs + 1): print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % ( epoch, lr, self.decay_rate) train_err = 0.0 train_batches = 0 train_bar = ProgressBar('Training', max=len(self.data['X_train'])) for batch in minibatches_iter( self.data['X_train'], self.data['Y_train'], masks=self.data['mask_train'], char_inputs=self.data['C_train'], lexicons=self.lexicons['lexicons_train'], batch_size=self.batch_size, shuffle=True): inputs, targets, masks, char_inputs, lexicons = batch err = self.model.train_fn(inputs, targets, masks, char_inputs, lexicons) train_err += err train_bar.next(len(inputs)) if train_batches > 0 and train_batches % self.valid_freq == 0: accuracy_valid = [] for batch in minibatches_iter( self.data['X_dev'], self.data['Y_dev'], masks=self.data['mask_dev'], lexicons=self.lexicons['lexicons_dev'], char_inputs=self.data['C_dev'], batch_size=self.batch_size): inputs, targets, masks, char_inputs, lexicons = batch accuracy_valid.append( self.model.eval_fn(inputs, targets, masks, char_inputs, lexicons)) this_validation_accuracy = np.concatenate(accuracy_valid)[ 0:n_dev_samples].sum() / float(n_dev_samples) if this_validation_accuracy > best_validation_accuracy: print("\nTrain loss, " + str( (train_err / self.valid_freq)) + ", validation accuracy: " + str(this_validation_accuracy * 100) + "%") best_validation_accuracy = this_validation_accuracy preds_test = [] accuracy_test = [] for batch in minibatches_iter( self.data['X_test'], self.data['Y_test'], masks=self.data['mask_test'], char_inputs=self.data['C_test'], lexicons=self.lexicons['lexicons_test'], batch_size=self.batch_size): inputs, targets, masks, char_inputs, lexicons = batch _, preds = self.model.test_fn( inputs, targets, masks, char_inputs, lexicons) preds_test.append(preds) accuracy_test.append( self.model.eval_fn(inputs, targets, masks, char_inputs, lexicons)) this_test_accuracy = np.concatenate(accuracy_test)[ 0:n_test_samples].sum() / float(n_test_samples) # print "F1-score: " + str(compute_f1_score(self.data["Y_test"], preds_test, self.data['label_alphabet']) * 100) print("Test accuracy: " + str(this_test_accuracy * 100) + "%") if best_acc < this_test_accuracy: best_acc = this_test_accuracy write_model_data(self.model.network, self.model_path + '/best_model') train_err = 0 train_batches += 1 train_bar.finish() # stop if dev acc decrease 3 time straightly. if stop_count == patience: break # re-compile a function with new learning rate for training if self.update_algo != 'adadelta': lr = self.learning_rate / (1.0 + epoch * self.decay_rate) updates = utils.create_updates(self.model.loss_train, self.model.params, self.update_algo, lr, momentum=self.momentum) self.model.train_fn = theano.function( [ self.model.input_var, self.model.target_var, self.model.mask_var, self.model.char_input_var, self.model.lex_var ], outputs=self.model.loss_train, updates=updates, allow_input_downcast=True) print("Epoch " + str(epoch) + " finished.") print("The final best acc: " + str(best_acc * 100) + "%") if self.output_predict: f = codecs.open('./results/10-fold.txt', 'a+', 'utf-8') f.write(str(best_acc * 100) + '\n') f.close()