Exemplo n.º 1
0
Arquivo: main.py Projeto: yshen4/pymal
def problem5(T = 10, L = 0.2):
    toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

    thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
    thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
    thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

    plot_toy_results('Perceptron', thetas_perceptron, toy_features, toy_labels)
    plot_toy_results('Average Perceptron', thetas_avg_perceptron, toy_features, toy_labels)
    plot_toy_results('Pegasos', thetas_pegasos, toy_features, toy_labels)
Exemplo n.º 2
0
    def test_algorithm_compare(self):
        # -------------------------------------------------------------------------------
        # # Problem 5
        # #-------------------------------------------------------------------------------

        toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

        T = 100
        L = 0.2

        thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
        thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
        thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

        def plot_toy_results(algo_name, thetas):
            print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
            print('theta_0 for', algo_name, 'is', str(thetas[1]))
            utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

        plot_toy_results('Perceptron', thetas_perceptron)
        plot_toy_results('Average Perceptron', thetas_avg_perceptron)
        plot_toy_results('Pegasos', thetas_pegasos)
        return
Exemplo n.º 3
0
dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)
#
#-------------------------------------------------------------------------------
# Section 1.7
#-------------------------------------------------------------------------------
toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 5
L = 10

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Average Passive-Aggressive', thetas_avg_pa)
#-------------------------------------------------------------------------------
#
#
#-------------------------------------------------------------------------------
# Section 2.9.b
val_final_features   = p1.extract_final_features(val_texts, dictionary)
test_final_features  = p1.extract_final_features(test_texts, dictionary)

#data = (train_final_features, train_labels, val_final_features, val_labels)
#
## values of T and lambda to try
#Ts = [1, 5, 10, 15, 25, 50, 100]
#
#avg_pct_tune_results = utils.tune_avg_perceptron(Ts, *data)
#utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)

print(p1.average_perceptron_accuracy(train_final_features,val_final_features,train_labels,val_labels,T=15))
print(p1.average_perceptron_accuracy(val_final_features,test_final_features,val_labels,test_labels,T=15))

## Improve 2
theta = p1.average_perceptron(train_bow_features, train_labels, T=15)[0]
new_dictionary = p1.bag_of_elite_words(dictionary, theta, 0.25)

train_final_features = p1.extract_final_features(train_texts, new_dictionary)
val_final_features   = p1.extract_final_features(val_texts, new_dictionary)
test_final_features  = p1.extract_final_features(test_texts, new_dictionary)

#data = (train_final_features, train_labels, val_final_features, val_labels)
#
## values of T and lambda to try
#Ts = [1, 5, 10, 15, 25, 50, 100]
#
#avg_pct_tune_results = utils.tune_avg_perceptron(Ts, *data)
#utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)

print(p1.average_perceptron_accuracy(train_final_features,val_final_features,train_labels,val_labels,T=100))
Exemplo n.º 5
0
import project1 as p1
from project1 import perceptron, average_perceptron, pegasos
import utils
import numpy as np
import numpy.testing as npt
import re

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')
T = 10
L = 0.2

thetas = perceptron(toy_features, toy_labels, T)
print(thetas)
utils.plot_toy_data("Perceptron", toy_features, toy_labels, thetas)

thetas = average_perceptron(toy_features, toy_labels, T)
print(thetas)
utils.plot_toy_data("Average Perceptron", toy_features, toy_labels, thetas)

thetas = pegasos(toy_features, toy_labels, T, L)
print(thetas)
utils.plot_toy_data("Pegasos", toy_features, toy_labels, thetas)
Exemplo n.º 6
0
dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)
#
#-------------------------------------------------------------------------------
# Section 1.7
#-------------------------------------------------------------------------------
toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 5
L = 10

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L)

def plot_toy_results(algo_name, thetas):
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Average Passive-Aggressive', thetas_avg_pa)
#-------------------------------------------------------------------------------
#
#
#-------------------------------------------------------------------------------
# Section 2.9.b
#-------------------------------------------------------------------------------
# T = 5
Exemplo n.º 7
0
x = p1.average_perceptron_accuracy(train_bow_features, test_bow_features, train_labels, test_labels, T=25)
print(x)


#-------------------------------------------------------------------------------
# pass
#-------------------------------------------------------------------------------
#
#-------------------------------------------------------------------------------
# Section 2.11b
#
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------
best_theta = p1.average_perceptron(test_bow_features, test_labels, T=25)[0]
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:80])
#-------------------------------------------------------------------------------
#
#-------------------------------------------------------------------------------
# Section 3.12
#
# After constructing a final feature representation, use code similar to that in
# sections 2.9b and 2.10 to assess its performance on the validation set.
# You may use your best classifier from before as a baseline.
# When you are satisfied with your features, evaluate their accuracy on the test
# set using the same procedure as in section 2.11a.
#-------------------------------------------------------------------------------
Exemplo n.º 8
0
Arquivo: main.py Projeto: wesenu/6.036
#-------------------------------------------------------------------------------
# dictionary = p1.bag_of_words(train_texts)
#
# train_final_features = p1.extract_final_features(train_texts, dictionary)
# val_final_features   = p1.extract_final_features(val_texts, dictionary)
# test_final_features  = p1.extract_final_features(test_texts, dictionary)
#-------------------------------------------------------------------------------
#
#-------------------------------------------------------------------------------
# Section 3.13
#
# Modify the code below to extract your best features from the submission data
# and then classify it using your most accurate classifier.
#-------------------------------------------------------------------------------
submit_texts = [sample['text'] for sample in utils.load_data('reviews_submit.tsv')]

# 1. Extract your preferred features from the train and submit data
dictionary = p1.bag_of_words(submit_texts)
#print("dctionary, ", dictionary)
train_final_features = p1.extract_final_features(train_texts, dictionary)
#print (train_final_features)
submit_final_features = p1.extract_final_features(submit_texts, dictionary)

# 2. Train your most accurate classifier
final_thetas = p1.average_perceptron(train_final_features, train_labels, T=25)

# 3. Classify and write out the submit predictions.
submit_predictions = p1.classify(submit_final_features, *final_thetas)
utils.write_predictions('reviews_submit.tsv', submit_predictions)
#-------------------------------------------------------------------------------