def process_line(packed_line): line, i, X_test, y_test, verbose = packed_line split = line.split() params = np.array(split[1:]).astype(np.float) w = matricize_W(params) t = matricize_Tij(params) predictions = decode_crf(X_test, w, t) word_acc, char_acc = compute_word_char_accuracy_score(predictions, y_test) if verbose: print(str(i) + ": ", word_acc) return (1. - word_acc)
def save_word_error(result_file, output_file, X_test, y_test): f_vals = [] file = open(result_file, 'r') lines = file.readlines() file.close() for i, line in enumerate(lines): split = line.split() print(i, ": ", end='') params = np.array(split[1:]).astype(np.float) w = matricize_W(params) t = matricize_Tij(params) predictions = decode_crf(X_test, w, t) word_acc, char_acc = compute_word_char_accuracy_score(predictions, y_test) print(word_acc) # only word accuracy so just accuracy[0] f_vals.append(str(1 - word_acc) + "\n") file = open(output_file, 'w') file.writelines(f_vals) file.close()
if sum(abs(parameters - previous_params)) <= self.epsilon or iteration == self.iterations: break return parameters if __name__ == '__main__': LAMBDA = 1e-2 X_train, y_train = prepare_dataset("train_sgd.txt") X_test, y_test = prepare_dataset("test_sgd.txt") filepath = "%s_%s.txt" % ('GIBBS', LAMBDA) callback = Callback(X_train, y_train, filepath, LAMBDA) gibbs = SamplingOptimizer(LAMBDA, callback_fn=callback.callback_fn_return_vals) opt_params = gibbs.train(X_train) W = matricize_W(opt_params) T = matricize_Tij(opt_params) y_preds = decode_crf(X_train, W, T) word_acc, char_acc = compute_word_char_accuracy_score(y_preds, y_train) print("Final train accuracy :", "Word =", word_acc, "Char =", char_acc) y_preds = decode_crf(X_test, W, T) word_acc, char_acc = compute_word_char_accuracy_score(y_preds, y_test) print("Final test accuracy :", "Word =", word_acc, "Char =", char_acc)
for lambd in LAMBDAS: filepath = FILENAME_FMT % (OPTIMIZATION_NAME, lambd) remove_file(filepath) # maintain a callback to measure loss and average gradient every epoch callback = Callback(X_train, y_train, filepath, lambd) # train using BFGS optimal_params = optimize(params, X_train, y_train, lambd, callback.callback_fn) w = matricize_W(optimal_params) t = matricize_Tij(optimal_params) y_pred = decode_crf(X_train, w, t) word_acc, char_acc = compute_word_char_accuracy_score(y_pred, y_train) print("Train accuracies") print("Character accuracies :", char_acc) print("Word Accuracies :", word_acc) y_pred = decode_crf(X_test, w, t) word_acc, char_acc = compute_word_char_accuracy_score(y_pred, y_test) print("Test accuracies") print("Character accuracies :", char_acc) print("Word Accuracies :", word_acc) print()