Пример #1
0
def process_line(packed_line):
    line, i, X_test, y_test, verbose = packed_line

    split = line.split()
    params = np.array(split[1:]).astype(np.float)
    w = matricize_W(params)
    t = matricize_Tij(params)

    predictions = decode_crf(X_test, w, t)

    word_acc, char_acc = compute_word_char_accuracy_score(predictions, y_test)

    if verbose:
        print(str(i) + ": ", word_acc)

    return (1. - word_acc)
Пример #2
0
def save_word_error(result_file, output_file, X_test, y_test):
    f_vals = []

    file = open(result_file, 'r')
    lines = file.readlines()
    file.close()

    for i, line in enumerate(lines):
        split = line.split()
        print(i, ": ", end='')
        params = np.array(split[1:]).astype(np.float)
        w = matricize_W(params)
        t = matricize_Tij(params)

        predictions = decode_crf(X_test, w, t)

        word_acc, char_acc = compute_word_char_accuracy_score(predictions, y_test)
        print(word_acc)
        # only word accuracy so just accuracy[0]
        f_vals.append(str(1 - word_acc) + "\n")

    file = open(output_file, 'w')
    file.writelines(f_vals)
    file.close()
            if sum(abs(parameters - previous_params)) <= self.epsilon or iteration == self.iterations:
                break

        return parameters


if __name__ == '__main__':

    LAMBDA = 1e-2

    X_train, y_train = prepare_dataset("train_sgd.txt")
    X_test, y_test = prepare_dataset("test_sgd.txt")

    filepath = "%s_%s.txt" % ('GIBBS', LAMBDA)

    callback = Callback(X_train, y_train, filepath, LAMBDA)

    gibbs  = SamplingOptimizer(LAMBDA, callback_fn=callback.callback_fn_return_vals)
    opt_params = gibbs.train(X_train)

    W = matricize_W(opt_params)
    T = matricize_Tij(opt_params)

    y_preds = decode_crf(X_train, W, T)
    word_acc, char_acc = compute_word_char_accuracy_score(y_preds, y_train)
    print("Final train accuracy :", "Word =", word_acc, "Char =", char_acc)

    y_preds = decode_crf(X_test, W, T)
    word_acc, char_acc = compute_word_char_accuracy_score(y_preds, y_test)
    print("Final test accuracy :", "Word =", word_acc, "Char =", char_acc)
    for lambd in LAMBDAS:
        filepath = FILENAME_FMT % (OPTIMIZATION_NAME, lambd)

        remove_file(filepath)

        # maintain a callback to measure loss and average gradient every epoch
        callback = Callback(X_train, y_train, filepath, lambd)

        # train using BFGS
        optimal_params = optimize(params, X_train, y_train, lambd,
                                  callback.callback_fn)

        w = matricize_W(optimal_params)
        t = matricize_Tij(optimal_params)

        y_pred = decode_crf(X_train, w, t)
        word_acc, char_acc = compute_word_char_accuracy_score(y_pred, y_train)

        print("Train accuracies")
        print("Character accuracies :", char_acc)
        print("Word Accuracies :", word_acc)

        y_pred = decode_crf(X_test, w, t)
        word_acc, char_acc = compute_word_char_accuracy_score(y_pred, y_test)

        print("Test accuracies")
        print("Character accuracies :", char_acc)
        print("Word Accuracies :", word_acc)

        print()