示例#1
0
sk = raw_input("Enter number of nearest neighbors to consider or 'S' to sweep values [S]: ") or 's'
if sk.lower() == 's':
    print("Sweeping k")
    k_list = [1, 3, 5, 7, 21, 101, 401]
else:
    k_list = [int(sk)]


validation_errors = zeros((len(k_list), len(n_list)))
for l, n in enumerate(n_list):
    for j, k in enumerate(k_list):
        xj = x[:n]
        tj = t[:n]
        start = time.time()
        for i, xi in enumerate(x_eval):
            ti = knn_classify(xj, tj, xi, k, euclidean_distance)
            if ti != t_eval[i]:
                validation_errors[j,l] += 1
        end = time.time()
        print("K = %d, N = %d, Validation Errors = %d, Time = %f"%(k,n,validation_errors[j,l],end-start))
    print(" ")

if len(k_list) > 1:
    plot(k_list, validation_errors[:,0].flatten()/len(x_eval)*100, 'x-', label="N = %d"%(n_list[0]))
    xlabel('Neighbors Considered')
    ylabel('Validation Errors %')
    legend(loc='best')
    xscale('log')
    grid(True,which="both",ls="-")
    savefig("results/Task_2.eps")
    savetxt("results/Task_2.csv", array(zip(k_list, validation_errors[:,0])), fmt='%i %i')
示例#2
0
x_test = x_test_f + x_test_m
t_test = np.hstack((np.ones(len(x_test_f)), np.zeros(len(x_test_m))))

xto = x_train
xteo = x_test

x_train = [rgb2gray(imresize(x, (32,32))) for x in x_train]
x_validation = [rgb2gray(imresize(x, (32,32))) for x in x_validation]
x_test = [rgb2gray(imresize(x, (32,32))) for x in x_test]

krange = [i for j in (range(1,10), range(11, len(x_train),5), [len(x_train)]) for i in j]
validation_errors = np.zeros(len(krange))
validation_distances = defaultdict(list)
for j, k in enumerate(krange):
    for i, xi in enumerate(x_validation):
        ti, _ = knn_classify(x_train, t_train, xi, k, euclidean_distance, validation_distances[i])

        if ti != t_validation[i]:
            validation_errors[j] += 1
    print "K = %d - Validation Errors = %d (%d%%)" %(k, validation_errors[j], validation_errors[j]/len(x_validation)*100)

best_ki = np.where(validation_errors == validation_errors.min())[0]
best_k = np.array(krange)[best_ki]
best_perf = validation_errors[best_ki]/len(x_validation)*100
np.savetxt("results/part_5/eval_performance.csv", np.array(zip(best_k, best_perf)), fmt='%i %i')
print "Best values for k: %s\n" %best_k

test_errors = np.zeros(len(best_k))
test_distances = defaultdict(list)
trigger = 0
nl=0
示例#3
0
from favorite_language_data import coord_language_pairs
from matplotlib import pyplot
from k_nearest_neighbors import knn_classify

for k in [1, 3, 5, 7]:
    correct_predictions = 0

    for city in coord_language_pairs:
        other_cities = [
            other_city for other_city in coord_language_pairs
            if other_city != city
        ]

        predicted_language = knn_classify(k, other_cities, city[0])
        actual_language = city[1]
        print('coords, actual_language, predicted_language = %s, %s, %s' %
              (city[0], actual_language, predicted_language))

plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}

markers = {"Java": "o", "Python": "s", "R": "^"}
colors = {"Java": "r", "Python": "b", "R": "g"}

for (latitude, longitude), language in coord_language_pairs:
    plots[language][0].append(latitude)
    plots[language][1].append(longitude)

for language, (x, y) in plots.items():
    pyplot.scatter(x,
                   y,
                   color=colors[language],
from decision_tree_classification import dt_classify
from kernel_svm import ksvm_classify
from logistic_regression import logreg_classify
from naive_bayes import nb_classify
from k_nearest_neighbors import knn_classify
from random_forest_classification import rf_classify
from support_vector_machine import lsvm_classify

print('Decision Tree Model: ', dt_classify('breast_cancer.csv'))
print('Random Forrest Model: ', rf_classify('breast_cancer.csv'))
print('K-NN Model: ', knn_classify('breast_cancer.csv'))
print('Linear SVM Model: ', lsvm_classify('breast_cancer.csv'))
print('Kernel SVM Model: ', ksvm_classify('breast_cancer.csv'))
print('Logistic Regression Model: ', logreg_classify('breast_cancer.csv'))
print('Naive Bayes Model: ', nb_classify('breast_cancer.csv'))

示例#5
0
    "Enter number of nearest neighbors to consider or 'S' to sweep values [S]: "
) or 's'
if sk.lower() == 's':
    print("Sweeping k")
    k_list = [1, 3, 5, 7, 21, 101, 401]
else:
    k_list = [int(sk)]

validation_errors = zeros((len(k_list), len(n_list)))
for l, n in enumerate(n_list):
    for j, k in enumerate(k_list):
        xj = x[:n]
        tj = t[:n]
        start = time.time()
        for i, xi in enumerate(x_eval):
            ti = knn_classify(xj, tj, xi, k, euclidean_distance)
            if ti != t_eval[i]:
                validation_errors[j, l] += 1
        end = time.time()
        print("K = %d, N = %d, Validation Errors = %d, Time = %f" %
              (k, n, validation_errors[j, l], end - start))
    print(" ")

if len(k_list) > 1:
    plot(k_list,
         validation_errors[:, 0].flatten() / len(x_eval) * 100,
         'x-',
         label="N = %d" % (n_list[0]))
    xlabel('Neighbors Considered')
    ylabel('Validation Errors %')
    legend(loc='best')
示例#6
0
from decision_tree_classification import dt_classify
from kernel_svm import ksvm_classify
from logistic_regression import logreg_classify
from naive_bayes import nb_classify
from k_nearest_neighbors import knn_classify
from random_forest_classification import rf_classify
from support_vector_machine import lsvm_classify

logreg_classify('../Restaurant_Reviews.tsv')
print('********************************************')
knn_classify('../Restaurant_Reviews.tsv')
print('********************************************')
dt_classify('../Restaurant_Reviews.tsv')
print('********************************************')
ksvm_classify('../Restaurant_Reviews.tsv')
print('********************************************')
lsvm_classify('../Restaurant_Reviews.tsv')
print('********************************************')
rf_classify('../Restaurant_Reviews.tsv')


示例#7
0
                                             0, 20)

genders = ['Male', 'Female']
x_train = x_train_f + x_train_m
t_train = np.hstack((np.ones(len(x_train_f)), np.zeros(len(x_train_m))))

x_test = x_test_f + x_test_m
t_test = np.hstack((np.ones(len(x_test_f)), np.zeros(len(x_test_m))))

# Resize to 32x32 and convert to grayscale
x_train_bw = [rgb2gray(imresize(x, (32, 32))) for x in x_train]
x_test_bw = [rgb2gray(imresize(x, (32, 32))) for x in x_test]

# Classify pictures in test set
for xi in np.random.permutation(len(x_test_bw)):
    ti, nn = knn_classify(x_train_bw, t_train, x_test_bw[xi], 5)
    # Plot image with classification
    plt.subplot(121)
    plt.imshow(x_test[xi])
    plt.axis('off')
    plt.title(genders[int(ti)],
              color=('green' if ti == t_test[xi] else 'red'),
              weight='bold')

    # Plot nearest neighbors
    idx = [3, 4, 7, 8, 11]
    for i, nni in enumerate(nn):
        plt.subplot(3, 4, idx[i])
        plt.imshow(x_train[nni])
        plt.axis('off')
        plt.title(genders[int(t_train[nni])])