示例#1
0
def testAlgorithm(algorithmToTest , name):    
    start = time.clock()
    average_theta,theta_0 = algorithmToTest(feature_matrix, labels)
    stop = time.clock()
    label_output = p1.perceptron_classify(feature_matrix, theta_0, average_theta)
    
    correct = 0
    for i in xrange(0, len(label_output)):
        if(label_output[i] == labels[i]):
            correct = correct + 1

    percentage_correct = 100.0 * correct / len(label_output)
    print(name + " gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")
    return average_theta, theta_0, stop - start
def example(algorithm):
    
    dictionary = p1.extract_dictionary('train-tweet.txt')
    labels = p1.read_vector_file('train-answer.txt')
    feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)
    
    if algorithm== 'averager':
        average_theta,average_theta_0 = p1.averager(feature_matrix, labels)
    elif algorithm== 'perceptron':
        average_theta,average_theta_0 = p1.perceptron_algorithm(feature_matrix, labels)
    elif algorithm== 'passive':
        average_theta,average_theta_0 = p1.passive_aggressive(feature_matrix, labels)

    

    label_output = p1.perceptron_classify(feature_matrix, average_theta_0, average_theta)

    correct = 0
    for i in xrange(0, len(label_output)):
        if(label_output[i] == labels[i]):
            correct +=1

    percentage_correct = 100.0 * correct / len(label_output)
    print(algorithm + " gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")
    		for word in flist: 
    			for entry in dict:
    				if word == entry[0]:
    					feature_matrix[pos, dict.index(entry)] = math.floor(math.log(1+entry[1]))
    		pos = pos + 1
            
    f.close()
    
    return feature_matrix

dictionary = extract_dictionary('train-tweet.txt')
labels = p1.read_vector_file('train-answer.txt')
feature_matrix = extract_feature_vectors('train-tweet.txt', dictionary)

theta = p1.perceptron(feature_matrix, labels)
theta_0 = theta[len(theta)-1]
theta = np.delete(theta, len(theta)-1)
label_output = p1.perceptron_classify(feature_matrix, theta_0, theta)

correct = 0
for i in xrange(0, len(label_output)):
    if(label_output[i] == labels[i]):
        correct = correct + 1

percentage_correct = 100.0 * correct / len(label_output)
print("Augmented perceptron gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")

test = p1.cross_validation_perceptron(feature_matrix, labels)
print test

#returned 508
示例#4
0
import numpy as np
import project1_code as p1

dictionary = p1.extract_dictionary('train-tweet.txt')
labels = p1.read_vector_file('train-answer.txt')
feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)
(nsamples, nfeatures) = feature_matrix.shape
initial_theta = np.zeros([nfeatures])

theta_vector = p1.pa(feature_matrix, initial_theta, 0, labels)
label_output = p1.perceptron_classify(feature_matrix, 0, theta_vector) # don't know if this is needed

correct = 0
for i in xrange(0, len(label_output)):
    if(label_output[i] == labels[i]):
        correct = correct + 1

percentage_correct = 100.0 * correct / len(label_output)
print("Passive Aggressive gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")
import numpy as np
import project1_code as p1

dictionary = p1.extract_dictionary('train-tweet.txt')
labels = p1.read_vector_file('train-answer.txt')
feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)
feature_matrix_real = p1.extract_feature_vectors('sample_from_tweepy.txt', dictionary)


average_without_offset_theta = p1.averager(feature_matrix, labels)
theta_0 = average_without_offset_theta[len(average_without_offset_theta)-1]
average_without_offset_theta = np.delete(average_without_offset_theta, len(average_without_offset_theta)-1)

label_output = p1.perceptron_classify(feature_matrix, 0, average_without_offset_theta)

correct = 0
for i in xrange(0, len(label_output)):
    if(label_output[i] == labels[i]):
        correct = correct + 1

percentage_correct = 100.0 * correct / len(label_output)
print("Averager without offset gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")


average_theta = p1.averager(feature_matrix, labels)
theta_0 = average_theta[len(average_theta)-1]
average_theta = np.delete(average_theta, len(average_theta)-1)

label_output = p1.perceptron_classify(feature_matrix, theta_0, average_theta)

correct = 0
示例#6
0
import numpy as np
import project1_code as p1

######################
# INITIALIZE
######################
adjectives = p1.extract_set('adjectives.txt')
dictionary = p1.extract_dictionary('train-tweet.txt')
train_labels = p1.read_vector_file('train-answer.txt')
train_feature_matrix = p1.extract_feature_vectors_with_keywords(
    'train-tweet.txt', dictionary, adjectives)
test_feature_matrix = p1.extract_feature_vectors_with_keywords(
    'test-tweet.txt', dictionary, adjectives)

######################
# TRAIN
######################
pa_theta, pa_theta_0 = p1.train_passive_agressive(train_feature_matrix,
                                                  train_labels, 1000)

######################
# CLASSIFY
######################
label_output = p1.perceptron_classify(test_feature_matrix, pa_theta_0,
                                      pa_theta)

print train_feature_matrix.shape
print test_feature_matrix.shape
p1.write_label_answer(label_output, 'tweet_labels.txt')
import numpy as np
import project1_code as p1  #################

dictionary = p1.extract_dictionary('train-tweet.txt')
labels = p1.read_vector_file('train-answer.txt')
feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)

#Using Averager w Offset
average_theta,average_theta_0 = p1.averager(feature_matrix, labels)
label_output = p1.perceptron_classify(feature_matrix, average_theta_0, average_theta)

correct = 0
for i in xrange(0, len(label_output)):
    if(label_output[i] == labels[i]):
        correct = correct + 1

percentage_correct = 100.0 * correct / len(label_output)
print("Averager gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")



def example(algorithm):
    
    dictionary = p1.extract_dictionary('train-tweet.txt')
    labels = p1.read_vector_file('train-answer.txt')
    feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)
    
    if algorithm== 'averager':
        average_theta,average_theta_0 = p1.averager(feature_matrix, labels)
    elif algorithm== 'perceptron':
        average_theta,average_theta_0 = p1.perceptron_algorithm(feature_matrix, labels)
示例#8
0
import numpy as np
import project1_code as p1

dictionary = p1.extract_dictionary('train-tweet.txt')
labels = p1.read_vector_file('train-answer.txt')
feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)
(nsamples, nfeatures) = feature_matrix.shape
initial_theta = np.zeros([nfeatures])

theta_vector = p1.perceptron(feature_matrix, initial_theta, -1, labels)

test_matrix = p1.extract_feature_vectors('test-tweet.txt', dictionary)

label_output = p1.perceptron_classify(test_matrix, 0, theta_vector)

p1.write_label_answer(label_output, 'tweet_labels.txt')
示例#9
0
import numpy as np
import project1_code as p1

dictionary = p1.extract_dictionary('train-tweet.txt')
labels = p1.read_vector_file('train-answer.txt')
feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)

average_theta = p1.averager(feature_matrix, labels)
label_output = p1.perceptron_classify(feature_matrix, 0, average_theta)

correct = 0
for i in xrange(0, len(label_output)):
    if (label_output[i] == labels[i]):
        correct = correct + 1

percentage_correct = 100.0 * correct / len(label_output)
print("Averager gets " + str(percentage_correct) + "% correct (" +
      str(correct) + " out of " + str(len(label_output)) + ").")

p1.plot_2d_examples(feature_matrix, labels, 0, average_theta)
示例#10
0
import numpy as np
import project1_code as p1

dictionary = p1.extract_dictionary('train-tweet.txt')
labels = p1.read_vector_file('train-answer.txt')
feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary)

average_theta = p1.averager(feature_matrix, labels)
label_output = p1.perceptron_classify(feature_matrix, 0, average_theta)

correct = 0
for i in xrange(0, len(label_output)):
    if(label_output[i] == labels[i]):
        correct = correct + 1

percentage_correct = 100.0 * correct / len(label_output)
print("Averager gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")


p1.plot_2d_examples(feature_matrix, labels, 0, average_theta)
示例#11
0
import numpy as np
import project1_code as p1

######################
# INITIALIZE
######################
adjectives = p1.extract_set('adjectives.txt')
dictionary = p1.extract_dictionary('train-tweet.txt')
train_labels = p1.read_vector_file('train-answer.txt')
train_feature_matrix = p1.extract_feature_vectors_with_keywords('train-tweet.txt', dictionary, adjectives)
test_feature_matrix = p1.extract_feature_vectors_with_keywords('test-tweet.txt', dictionary, adjectives)

######################
# TRAIN
######################
pa_theta, pa_theta_0 = p1.train_passive_agressive(train_feature_matrix, train_labels, 1000)

######################
# CLASSIFY
######################
label_output = p1.perceptron_classify(test_feature_matrix, pa_theta_0, pa_theta)

print train_feature_matrix.shape
print test_feature_matrix.shape
p1.write_label_answer(label_output, 'tweet_labels.txt')