def logistic_regression(train_data, train_labels, test_data, test_labels):

    print(f'{LogisticRegression.__name__}:')

    # Create and train model
    lr_model = LogisticRegression(train_data.shape[1], eta=0.001, epochs=50)
    model = OneVersusRest(lr_model)

    model.train(train_data, train_labels)

    # Predict 2000 validation set samples and calculate accuracy
    test_data_2k = test_data[:len(test_labels)]
    test_pred = model.predict(test_data_2k)

    # Print metrics
    print('\nTest Accuracy: {:.02f}%\n'.format(
        100 * accuracy(test_pred, test_labels)))
    mat, classes = confusion_matrix(test_pred, test_labels)
    print('Precision:\n{}\n'.format(
        np.round(precision(test_pred, test_labels), 2)))
    print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2)))
    print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2)))
    print('Confusion Matrix:')
    print(mat)

    # Predict 10000 test set samples and save predictions
    print('Predicting 10k samples...')
    test_pred = model.predict(test_data)
    save_predictions(logistic_regression.__name__, test_pred)
    print('Saved 10k predictions.\n')
def linear_svm(train_data, train_labels, test_data, test_labels):
    print(f'{LinearSVM.__name__}:')

    # Create and train model
    lsvm_model = LinearSVM(alpha=0.01, features=180)
    model = OneVersusRest(lsvm_model)

    model.train(train_data, train_labels)

    # Predict 2000 validation set samples and calculate accuracy
    test_data_2k = test_data[:len(test_labels)]
    test_pred = model.predict(test_data_2k)

    # Print metrics
    print('\nTest Accuracy: {:.02f}%\n'.format(
        100 * accuracy(test_pred, test_labels)))
    mat, classes = confusion_matrix(test_pred, test_labels)
    print('Precision:\n{}\n'.format(
        np.round(precision(test_pred, test_labels), 2)))
    print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2)))
    print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2)))
    print('Confusion Matrix:')
    print(mat)

    # Predict 10000 test set samples and save predictions
    print('Predicting 10k samples...')
    test_pred = model.predict(test_data)
    save_predictions(linear_svm.__name__, test_pred)
    print('Saved 10k predictions.\n')
def nearest_neighbour(train_data, train_labels, test_data, test_labels):

    print(f'{NearestNeighbour.__name__}:')

    # Create and train model
    model = NearestNeighbour(5, dist=manhattan)
    model.train(train_data, train_labels)

    # Predict 2000 validation set samples and calculate accuracy
    test_data_2k = test_data[:len(test_labels)]
    test_pred = model.predict(test_data_2k)

    # Print metrics
    print('\nTest Accuracy: {:.02f}%\n'.format(
        100 * accuracy(test_pred, test_labels)))
    mat, classes = confusion_matrix(test_pred, test_labels)
    print('Precision:\n{}\n'.format(
        np.round(precision(test_pred, test_labels), 2)))
    print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2)))
    print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2)))
    print('Confusion Matrix:')
    print(mat)

    # Predict 10000 test set samples and save predictions
    print('Predicting 10k samples...')
    test_pred = model.predict(test_data)
    save_predictions(nearest_neighbour.__name__, test_pred)
    print('Saved 10k predictions.\n')
Exemplo n.º 4
0
def epoch_pass(batch, model, loss_fn, device):
    """
    Returns confusion matrix and loss between the predicted output and target output
    :param DataLoader batch:
    :param nn_Module model:
    :param loss_fn: loss function
    :param str device:
    :return:
    """
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)
    targets = batch["encoded_genres"].to(device)

    batch_logits = model(input_ids, attention_mask)
    predictions = binary_labeling(batch_logits, threshold=0.5, device=device)
    return confusion_matrix(predictions, targets,
                            is_torch=True), loss_fn(batch_logits, targets)
Exemplo n.º 5
0
def main():
    np.random.seed(seed)
    train1_dataset, test1_dataset, comp1_dataset, train2_dataset, comp2_dataset = load_datasets()
    
    # # retrain models
    # feature_vector1, feature_vector2 = load_feature_vectors(train1_dataset, train2_dataset)
    # model1 = retrain_model1(train1_dataset, feature_vector1) 
    # model2 = retrain_model2(train2_dataset, feature_vector2)

    # load models
    model1, model2 = load_trained_models(train1_dataset, train2_dataset)

    test1_pred_tags, test1_true_tags = model1.predict(test1_dataset.sentences, beam=1, tqdm_bar=False)
    test1_accuracy = model1.score_func(test1_pred_tags, test1_true_tags)
    test1_confusion_matrix, test1_tags_accuracy = metrics.confusion_matrix(train1_dataset.tags, test1_pred_tags, test1_true_tags)

    worst10_test1_confusion_matrix = test1_confusion_matrix.loc[list(test1_tags_accuracy.keys())[:10], list(test1_tags_accuracy.keys())[:10]]
    worst10_test1_tags_accuracy = list(test1_tags_accuracy.items())[:10]
    print('test1_accuracy:', test1_accuracy)
    print('worst10_test1_tags_accuracy:', worst10_test1_tags_accuracy)
    print('worst10_test1_confusion_matrix:\n', worst10_test1_confusion_matrix)

    comp1_pred_tags = model1.predict(comp1_dataset.sentences, beam=5, tqdm_bar=False)[0]
    save_wtag(comp1_dataset, comp1_pred_tags, 1)

    comp2_pred_tags = model2.predict(comp2_dataset.sentences, beam=1, tqdm_bar=False)[0]
    save_wtag(comp2_dataset, comp2_pred_tags, 2)

    # insert path to comp1 and comp2 true tagged files (in a .wtag format) to perform accuracy evaluation
    comp1_tagged_path = 'comp1_tagged.wtag'
    comp2_tagged_path = 'comp2_tagged.wtag'

    try:
        comp1_true_dataset = preprocess.Dataset(comp1_tagged_path)
        comp1_accuracy = model1.score_func(comp1_pred_tags, [sentence[1] for sentence in comp1_true_dataset.sentences])
        print(f'comp1_accuracy={comp1_accuracy}')
    except:
        pass

    try:
        comp2_true_dataset = preprocess.Dataset(comp2_tagged_path)
        comp2_accuracy = model2.score_func(comp2_pred_tags, [sentence[1] for sentence in comp2_true_dataset.sentences])
        print(f'comp2_accuracy={comp2_accuracy}')
    except:
        pass
def neural_net(train_data, train_labels, test_data, test_labels):

    print(f'{NeuralNetwork.__name__}:')

    # Create and train model
    model = NeuralNetwork([
        FlatDenseLayer((784, ), activation=tanh),
        FlatDenseLayer((100, ), activation=tanh),
        FlatDenseLayer((20, ), activation=tanh),
        FlatDenseLayer((10, ), activation=sigmoid),
    ],
                          eta=0.01,
                          batch_size=64,
                          epochs=250)

    model.train(train_data, train_labels)

    # Predict 2000 validation set samples and calculate accuracy
    test_data_2k = test_data[:len(test_labels)]
    test_activations, test_pred = model.predict(test_data_2k)

    # Print metrics
    print('\nTest Accuracy: {:.02f}%\n'.format(
        100 * accuracy(test_pred, test_labels)))
    mat, classes = confusion_matrix(test_pred, test_labels)
    print('Precision:\n{}\n'.format(
        np.round(precision(test_pred, test_labels), 2)))
    print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2)))
    print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2)))
    print('Confusion Matrix:')
    print(mat)

    # Predict 10000 test set samples and save predictions
    print('Predicting 10k samples...')
    test_activations, test_pred = model.predict(test_data)
    print(len(test_pred))
    save_predictions(neural_net.__name__, test_pred)
    print('Saved 10k predictions.\n')
Exemplo n.º 7
0
    column = extra%32
    base+= (row+column*32)
    predictions[base] = preds[i]




print("Done predicting character labels using CNN.")

# Compute character error rate and word error rate before error correction
print("PRE-ERROR CORRECTION")
print("Computing character error rate (CER)...")
cer = char_err_rate(predictions, kjv)


CM = confusion_matrix(predictions, kjv)
print(CM)
ax = sns.heatmap(CM,
                 cmap="jet",
                 xticklabels=sorted(kjv.char_to_int.keys()),
                 yticklabels=sorted(kjv.char_to_int.keys()))
ax.set_title("Confusion Matrix")
plt.xlabel("Character 2")
plt.ylabel("Character 1")
plt.show()


print("Character error rate (CER): %.3f%%" % (cer * 100.0))

print("Computing word error rate (WER)...")
wer = word_err_rate(predictions, kjv)