示例#1
0
def cross_validation(X_train, y_train, num_folds, k_choices, m_choices):
    num_test = X_train.shape[0] / num_folds

    # 将训练集分成 num_folds 份
    X_train_folds = np.array(np.array_split(X_train, num_folds))
    y_train_folds = np.array(np.array_split(y_train, num_folds))
    # 保存不同 k 的结果
    k_to_accuracies = dict.fromkeys(m_choices)
    for m in k_to_accuracies:
        k_to_accuracies[m] = {}

    # 交叉验证核心运行代码
    for dist_m in m_choices:
        for n in range(num_folds):
            combinat = [x for x in range(num_folds) if x != n]
            x_training_dat = np.concatenate(X_train_folds[combinat])
            y_training_dat = np.concatenate(y_train_folds[combinat])
            classifier_k = KNearestNeighbor()
            classifier_k.train(x_training_dat, y_training_dat)
            ks_y_cross_validation_pred = classifier_k.predict_labels_diffrent_Ks(
                X_train_folds[n], k_choices, dist_m)
            for k in range(len(k_choices)):
                # y_cross_validation_pred = classifier_k.predict(X_train_folds[n], k=k_choices[k], dist_m=dist_m)
                # num_correct = np.sum(y_cross_validation_pred == y_train_folds[n])
                num_correct = np.sum(
                    ks_y_cross_validation_pred[k] == y_train_folds[n])
                accuracy = float(num_correct) / num_test
                k_to_accuracies[dist_m].setdefault(k_choices[k],
                                                   []).append(accuracy)
                print("num_folds: %d / %d, dist_m: %s, k: %d, acc: %f" %
                      (n + 1, num_folds, dist_m, k_choices[k], accuracy))
    return k_to_accuracies
示例#2
0
def index():
    mndata = MNIST('./data')

    # Load data to variables
    train_images, train_labels = mndata.load_training()
    test_images, test_labels = mndata.load_testing()

    # Create random range of test examples to include
    exampleindeces = np.random.random_integers(0, high=9999, size=4)

    # Assign test data to numpy arrays
    images = np.asarray(test_images)
    labels = np.asarray(test_labels)

    # Construct the KNN classifier
    classifier = KNearestNeighbor()

    # Load the classifier with train data
    classifier.train(np.asarray(train_images), np.asarray(train_labels))

    # Predict the labels with KNN
    predictions = np.rint(classifier.predict(images[exampleindeces], 3))
    # Save ground truth labels for checking if prediction was correct
    truths = labels[exampleindeces]

    i = 1
    for index in exampleindeces:
        two_d = (np.reshape(images[index], (28, 28)) * 255).astype(np.uint8)
        im = Image.fromarray(two_d, 'L')
        filename = "static/" + str(i) + ".png"
        im.save(filename)
        i += 1

    # Render the page
    return render_template('index.html', preds=predictions, truths=truths)
示例#3
0
 def __init__(self):
     self.data = data.data_utilities.get_CIFAR10_data(num_training=50000,
                                                      num_validation=0,
                                                      num_test=10000)
     self.knn = KNearestNeighbor()
     self.knn.train(self.data['X_train'].reshape((50000, 3072)),
                    self.data['y_train'])
     self.mlp = load_model("mlp_relu.h5")
     self.cnn = load_model("cnn_relu.h5")
     self.datamean = np.resize(self.data['mean'], (1, 3072))
示例#4
0
def run_test(best_k, best_m, X_train, y_train, X_test, y_test):
    # 选择最好的 k 值,在测试集中测试
    num_test = X_test.shape[0]
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    y_test_pred = classifier.predict(X_test, k=best_k, dist_m=best_m)

    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print('Got %d / %d correct => accuracy: %f' %
          (num_correct, num_test, accuracy))
    return num_correct, num_test, accuracy
示例#5
0
class ImageHandler:
    #Predictors initializer
    def __init__(self):
        self.data = data.data_utilities.get_CIFAR10_data(num_training=50000,
                                                         num_validation=0,
                                                         num_test=10000)
        self.knn = KNearestNeighbor()
        self.knn.train(self.data['X_train'].reshape((50000, 3072)),
                       self.data['y_train'])
        self.mlp = load_model("mlp_relu.h5")
        self.cnn = load_model("cnn_relu.h5")
        self.datamean = np.resize(self.data['mean'], (1, 3072))

    #Supposing url is a valid string url
    def predict_image(self, url):
        image_file = open("image_to_predict.jpg", "wb")
        req = Request(
            url,
            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
        res = urlopen(req)
        image = res.read()
        image_file.write(image)
        image_file.close()
        ready_image = self.prepare_image()

        #Predicting
        labels_dict = {}
        labels_dict["knn"] = self.knn.predict(X=ready_image, k=3)
        labels_dict["mlp"] = np.argmax(self.mlp.predict(ready_image))
        labels_dict["cnn"] = np.argmax(
            self.cnn.predict(np.reshape(ready_image, (1, 3, 32, 32))))
        return labels_dict

    #Resize and vectorize the image
    def prepare_image(self):
        img = Image.open("image_to_predict.jpg")
        resized = img.resize((32, 32))
        #Transpose the image tensor so that the
        img_tensor = np.array(resized).transpose(2, 0, 1).copy()
        image_array = np.resize(img_tensor, (1, 3072)) - self.datamean
        return image_array
示例#6
0
        if i == 0:
            plt.title(cls)
# plt.show()
# plt.close()

#选取5000张训练集, 500张测试集,
num_training = 5000
mask = range(num_claesses)
x_train = x_train[mask]
y_train = y_train[mask]
num_test = 500
mask = range(num_test)
x_test = x_test[mask]
y_test = y_test[mask]

x_train = np.reshape(x_train, (x_train.shape[0], -1))  #把图像数据拉长为行向量
x_test = np.reshape(x_test, (x_test.shape[0], -1))
print("x_train的shape:", x_train.shape)
print("x_test的shape:", x_test.shape)

# 3.2) 测试集预测
classifier = KNearestNeighbor()
classifier.train(x_train, y_train)
dists = classifier.compute_distances_no_loops(x_test)
print(dists)

y_test_pred = classifier.predict_labels(dists, k=1)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))
    #raise ValueError(t)
    cm = metrics.confusion_matrix(testt, t)
    a = metrics.accuracy(testt, t)
    p, r = metrics.precision_and_recall(testt, t)
    try:
        f = metrics.f1_measure(testt, t)
    except:
        f = 0
    print("Accuracy = %f\n" % a)
    print("Precision = %f, Recall = %f\n" % (p, r))
    print("F1 measure = %f\n" % f)
    print("Tomorrow's Forecast: %f\n" % decision[-1])
    print(sum(t) / len(t))

elif model == "knn":
    knn = KNearestNeighbor(10, distance_measure='euclidean', aggregator='mean')
    knn.fit(trainf, traint)

    labels = knn.predict(testf)
    binary_labels = metrics.make_binary(labels)

    cm = metrics.confusion_matrix(testt, binary_labels)
    a = metrics.accuracy(testt, binary_labels)
    p, r = metrics.precision_and_recall(testt, binary_labels)
    f = metrics.f1_measure(testt, binary_labels)
    print(binary_labels)
    print("Accuracy = %f\n" % a)
    print("Precision = %f, Recall = %f\n" % (p, r))
    print("F1 measure = %f\n" % f)
    print(sum(binary_labels) / len(binary_labels))

def accuracy(actual, predicted):
    return sum(actual == predicted) / len(predicted)


data = pd.read_csv("car.csv", dtype="category", header=None)
data.columns = [
    "buying", "maint", "doors", "persons", "lug-boot", "safety", "accept"
]

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1],
                                                    data['accept'],
                                                    test_size=0.25,
                                                    random_state=0)

from knn import KNearestNeighbor
model = KNearestNeighbor(X_train, y_train)
predictions = model.fit_predict(X_test, k=1)
print(accuracy(y_test, predictions))

# model = MyNaiveBayes(smoothing=False)
# model.fit(X_train, y_train)
# predictions = model.predict(X_test)
# print(accuracy(y_test, predictions))

# model = MyNaiveBayes(smoothing=True)
# model.fit(X_train, y_train)
# predictions = model.predict(X_test)
# print(accuracy(y_test, predictions))
示例#9
0
num_claesses=len(classes)
samples_per_class=7

num_training = 5000
mask = range(num_training)#(0,5000),step=1
x_train = x_train[mask] #5000*32*#2*3
y_train = y_train[mask]
num_test = 500
mask = range(num_test)
x_test = x_test[mask]
y_test = x_test[mask]

x_train = np.reshape(x_train,(x_train.shape[0],-1))
x_test = np.reshape(x_test,(x_test.shape[0],-1))

classifier = KNearestNeighbor()
classifier.train(x_train,y_train)


#比较准确率
#dists = classifier.compute_distance_two_loops(x_test)
dists = classifier.compute_distance_one_loops(x_test)
y_test_pred = classifier.predict_labels(dists,k=1)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct)/num_test
print('get %d / %d correct =>accuracy : %f' % (num_correct ,num_test ,accuracy))

#dists_one = classifier.compute_distance_one_loops(x_test)
#difference = np.linalg.norm(dists-dists_one,ord='fro') #求范数
#print('difference was : %f' % difference)
示例#10
0
#为了加快我们的训练速度,我们只选取5000张训练集,500张测试集
num_training = 5000
mask = range(num_training)
x_train = x_train[mask]
y_train = y_train[mask]
num_test = 500
mask = range(num_test)
x_test = x_test[mask]
y_test = y_test[mask]

#至此,数据载入部分已经算是完成了,但是为了欧氏距离的计算,我们把得到的图像数据拉长成行向量
x_train = np.reshape(x_train, (x_train.shape[0], -1))
x_test = np.reshape(x_test, (x_test.shape[0], -1))
print(x_train.shape, x_test.shape)

classifier = KNearestNeighbor()
classifier.train(x_train, y_train)
dists = classifier.compute_distances_two_loops(x_test)  #也可用其他两种方法
y_test_pred = classifier.predict_labels(dists, k=1)
#模型评估也是机器学习中的一个重要概念,这里我们使用准确率作为模型的评价指标,
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))
#这三种方法的差别在于它们的计算时间不同,我们来做下比较。比较代码如下:
import time


def time_function(f, *args):
    tic = time.time()
    f(*args)
示例#11
0
from knn import KNearestNeighbor
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier


def test_model(model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)


dt = load_iris()
X_train, X_test, y_train, y_test = train_test_split(np.array(dt.data),
                                                    np.array(dt.target),
                                                    train_size=0.33)
my_accuracy = test_model(KNearestNeighbor(k=3))
sklearn_accuracy = test_model(KNeighborsClassifier(n_neighbors=3))

print('My accuracy: {} \nsklearn accuracy: {}'.format(my_accuracy,
                                                      sklearn_accuracy))
示例#12
0
data = processor.get_processed_data()
data = np.array(data)

# Shuffle data
np.random.shuffle(data)
print('Processed Data')

# KFold split and validation
kf = KFold(n_splits=10)

# Accuracy storing array
accuracies = []

# Run over each fold
for train_index, test_index in kf.split(data):
    # Get train and test data
    print("TRAIN:", train_index, "TEST:", test_index)
    train, test = data[train_index], data[test_index]
    # Build classifier on data
    knn = KNearestNeighbor(5, train, processor.labels)
    # Make predictions
    predictions = [knn.predict_class(point['data']) for point in test]
    # Compare to actual labels
    correct_labels = [int(prediction == label['label']) for prediction, label in zip(predictions, test)]
    # Calculate accuracy
    accuracy = sum(correct_labels)/len(correct_labels)
    print(accuracy)
    accuracies.append(accuracy)

# Print out how well it performed
print(accuracies)
import data.data_utilities
import time
import matplotlib.pyplot as plt

data_dict = data.data_utilities.get_CIFAR10_data()

#Preparing the data
Xtr = np.reshape(data_dict['X_train'],(data_dict['X_train'].shape[0],3072))
Ytr = data_dict['y_train']
Xte = np.reshape(data_dict['X_test'],(data_dict['X_test'].shape[0],3072))
Yte = data_dict['y_test']
Xval = np.reshape(data_dict['X_val'],(data_dict['X_val'].shape[0],3072))
Yval = data_dict['y_val']

#Istantiating the classifier
classifier = KNearestNeighbor()
classifier.train(Xtr, Ytr)

#Doing validation on k to find the best best_parameters

ks = [1,3,5,7,9,11,13,15]
k_accuracies = {}

log = open("knn_log.txt","w")

for k in ks:
    print("k : {0}".format(k))
    y_test_pred = classifier.predict(Xval, k=k)
    num_correct = np.sum(y_test_pred == Yval)
    k_accuracies[k] = [float(num_correct) / Xtr.shape[0]]
    log.write("With k : {0} got accuracy : {1}\n\n".format(k,k_accuracies[k][0]))
示例#14
0
from knn import KNearestNeighbor

if __name__ == "__main__":
    x = [[2, 2],
         [1, 2],
         [3, 4],
         [1, 1],
         [3, 3],
         [7, 2],
         [5, 2],
         [6, 1],
         [8, -0],
         [7, 3]]

    target = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

    knn = KNearestNeighbor(3)

    knn.train(x, target)

    output = knn.predict([[8, 3], [1, 1]])

    print(output)