def testAlgorithm(algorithmToTest , name): start = time.clock() average_theta,theta_0 = algorithmToTest(feature_matrix, labels) stop = time.clock() label_output = p1.perceptron_classify(feature_matrix, theta_0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print(name + " gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") return average_theta, theta_0, stop - start
def example(algorithm): dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) if algorithm== 'averager': average_theta,average_theta_0 = p1.averager(feature_matrix, labels) elif algorithm== 'perceptron': average_theta,average_theta_0 = p1.perceptron_algorithm(feature_matrix, labels) elif algorithm== 'passive': average_theta,average_theta_0 = p1.passive_aggressive(feature_matrix, labels) label_output = p1.perceptron_classify(feature_matrix, average_theta_0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct +=1 percentage_correct = 100.0 * correct / len(label_output) print(algorithm + " gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")
for word in flist: for entry in dict: if word == entry[0]: feature_matrix[pos, dict.index(entry)] = math.floor(math.log(1+entry[1])) pos = pos + 1 f.close() return feature_matrix dictionary = extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = extract_feature_vectors('train-tweet.txt', dictionary) theta = p1.perceptron(feature_matrix, labels) theta_0 = theta[len(theta)-1] theta = np.delete(theta, len(theta)-1) label_output = p1.perceptron_classify(feature_matrix, theta_0, theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Augmented perceptron gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") test = p1.cross_validation_perceptron(feature_matrix, labels) print test #returned 508
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) (nsamples, nfeatures) = feature_matrix.shape initial_theta = np.zeros([nfeatures]) theta_vector = p1.pa(feature_matrix, initial_theta, 0, labels) label_output = p1.perceptron_classify(feature_matrix, 0, theta_vector) # don't know if this is needed correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Passive Aggressive gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) feature_matrix_real = p1.extract_feature_vectors('sample_from_tweepy.txt', dictionary) average_without_offset_theta = p1.averager(feature_matrix, labels) theta_0 = average_without_offset_theta[len(average_without_offset_theta)-1] average_without_offset_theta = np.delete(average_without_offset_theta, len(average_without_offset_theta)-1) label_output = p1.perceptron_classify(feature_matrix, 0, average_without_offset_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Averager without offset gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") average_theta = p1.averager(feature_matrix, labels) theta_0 = average_theta[len(average_theta)-1] average_theta = np.delete(average_theta, len(average_theta)-1) label_output = p1.perceptron_classify(feature_matrix, theta_0, average_theta) correct = 0
import numpy as np import project1_code as p1 ###################### # INITIALIZE ###################### adjectives = p1.extract_set('adjectives.txt') dictionary = p1.extract_dictionary('train-tweet.txt') train_labels = p1.read_vector_file('train-answer.txt') train_feature_matrix = p1.extract_feature_vectors_with_keywords( 'train-tweet.txt', dictionary, adjectives) test_feature_matrix = p1.extract_feature_vectors_with_keywords( 'test-tweet.txt', dictionary, adjectives) ###################### # TRAIN ###################### pa_theta, pa_theta_0 = p1.train_passive_agressive(train_feature_matrix, train_labels, 1000) ###################### # CLASSIFY ###################### label_output = p1.perceptron_classify(test_feature_matrix, pa_theta_0, pa_theta) print train_feature_matrix.shape print test_feature_matrix.shape p1.write_label_answer(label_output, 'tweet_labels.txt')
import numpy as np import project1_code as p1 ################# dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) #Using Averager w Offset average_theta,average_theta_0 = p1.averager(feature_matrix, labels) label_output = p1.perceptron_classify(feature_matrix, average_theta_0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Averager gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") def example(algorithm): dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) if algorithm== 'averager': average_theta,average_theta_0 = p1.averager(feature_matrix, labels) elif algorithm== 'perceptron': average_theta,average_theta_0 = p1.perceptron_algorithm(feature_matrix, labels)
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) (nsamples, nfeatures) = feature_matrix.shape initial_theta = np.zeros([nfeatures]) theta_vector = p1.perceptron(feature_matrix, initial_theta, -1, labels) test_matrix = p1.extract_feature_vectors('test-tweet.txt', dictionary) label_output = p1.perceptron_classify(test_matrix, 0, theta_vector) p1.write_label_answer(label_output, 'tweet_labels.txt')
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) average_theta = p1.averager(feature_matrix, labels) label_output = p1.perceptron_classify(feature_matrix, 0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if (label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Averager gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") p1.plot_2d_examples(feature_matrix, labels, 0, average_theta)
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) average_theta = p1.averager(feature_matrix, labels) label_output = p1.perceptron_classify(feature_matrix, 0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Averager gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") p1.plot_2d_examples(feature_matrix, labels, 0, average_theta)
import numpy as np import project1_code as p1 ###################### # INITIALIZE ###################### adjectives = p1.extract_set('adjectives.txt') dictionary = p1.extract_dictionary('train-tweet.txt') train_labels = p1.read_vector_file('train-answer.txt') train_feature_matrix = p1.extract_feature_vectors_with_keywords('train-tweet.txt', dictionary, adjectives) test_feature_matrix = p1.extract_feature_vectors_with_keywords('test-tweet.txt', dictionary, adjectives) ###################### # TRAIN ###################### pa_theta, pa_theta_0 = p1.train_passive_agressive(train_feature_matrix, train_labels, 1000) ###################### # CLASSIFY ###################### label_output = p1.perceptron_classify(test_feature_matrix, pa_theta_0, pa_theta) print train_feature_matrix.shape print test_feature_matrix.shape p1.write_label_answer(label_output, 'tweet_labels.txt')