def main(): col_names = [ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species' ] iris = pd.read_csv('./iris.data', header=None, names=col_names) iris_class = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2} iris['species_num'] = [iris_class[i] for i in iris.species] X = iris.drop(['species', 'species_num'], axis=1).to_numpy() y = iris.species_num.to_numpy() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) kr = Knn(3) # knn = KNeighborsClassifier(3) # model = knn.fit(X_train, y_train) kr.fit(X_train, y_train) # p = model.predict(X_test) p2 = kr.predict(X_test) correct = 0 total = 0 for pred in zip(p2, y_test): if pred[0] == pred[1]: correct += 1 total += 1 print("acc :", correct / total)
def test_minkowski_distance(self): """Test to check that minkowski distance is correct""" knn = Knn(n_neighbors=3, p=5) knn.fit(np.array(little_X), little_Y) d = knn._minkowski_distance(np.array([3, 4])) assert np.allclose( d, [2.01234, 6.419382]), "Minkowski Distance is not correct"
def test_k_5(self): """Test to compare our knn with Sklearn knn when k=5 and distance is euclidean""" knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, y_train) prediction = knn.predict(X_test) knn2 = Knn(n_neighbors=5) knn2.fit(X_train, y_train) prediction2 = knn2.predict(X_test) assert np.alltrue( prediction == prediction2), "Error testing knn with k=5"
def test_distance_weight_2(self): """Test to compare our knn with Sklearn when k=5 and weights are the inverse of distance""" knn = KNeighborsClassifier(n_neighbors=5, weights='distance') knn.fit(X_train, y_train) prediction = knn.predict(X_test) knn2 = Knn(n_neighbors=5, weights='distance') knn2.fit(X_train, y_train) prediction2 = knn2.predict(X_test) assert np.alltrue(prediction == prediction2 ), "Error testing knn with k=5 and weights=distance"
def test_k_5_distance_minkowski(self): """Test to compare our knn with Sklearn knn when k=5 and distance is minkowski with p=3""" knn = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=3) knn.fit(X_train, y_train) prediction = knn.predict(X_test) knn2 = Knn(n_neighbors=5, metric="minkowski", p=3) knn2.fit(X_train, y_train) prediction2 = knn2.predict(X_test) assert np.alltrue(prediction == prediction2 ), "Error testing knn (minkowski) with k=5 and p=3"
def main(): X_train, y_train, X_test, y_test = load_mnist() # data binarization # for i in tqdm(range(len(x_train))): # for j in range(28): # for k in range(28): # x_train[i][j][k] = 1 if x_train[i][j][k] > 177 else 0 # for i in tqdm(range(len(x_test))): # for j in range(28): # x_test[i][j].squeeze() # for k in range(28): # x_test[i][j][k] = 1 if x_test[i][j][k] > 177 else 0 # plot data samples # plot = plt.subplots(nrows=4, ncols=5, sharex='all', sharey='all')[1].flatten() # for i in range(20): # img = x_train[i] # plot[i].set_title(y_train[i]) # plot[i].imshow(img, cmap='Greys', interpolation='nearest') # plot[0].set_xticks([]) # plot[0].set_yticks([]) # plt.tight_layout() # plt.show() knn = Knn() knn.fit(X_train, y_train) y_pred = knn.predict(X_test) correct = sum((y_test - y_pred) == 0) print('==> correct:', correct) print('==> total:', len(X_test)) print('==> acc:', correct / len(X_test)) # plot pred samples fig = plt.subplots(nrows=4, ncols=5, sharex='all', sharey='all')[1].flatten() for i in range(20): img = X_test[i] fig[i].set_title(y_pred[i]) fig[i].imshow(img, cmap='Greys', interpolation='nearest') fig[0].set_xticks([]) fig[0].set_yticks([]) plt.tight_layout() plt.show()
def main(): # getting data # returning set of features and set of labels # for each 4-elements set of features there is one label assigned # label is assgined based on characteristic resulting from features iris = datasets.load_iris() iris_df = pd.DataFrame(iris['data'], columns=iris['feature_names']) X = iris_df.to_numpy() y = iris['target'] iris_df['species'] = iris['target'] # print(X) # [[5.9 3. 4.2 1.5],...,[6. 2.2 4. 1. ],...,[6.1 2.9 4.7 1.4]] # print(y) # [0,0,0,....,1,1,1,....,2,2,2,...] plot_chart(iris_df) # splitting data into training and testing subsets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) clf = Knn(k=3) clf.fit(X_train, y_train) # fitting model with features and corresponding labels predictions = clf.predict(X_test) print('Test samples shape: ' + str(X_test.shape)) # 120 features print(X_test) print('') print('Predictions shape: ' + str(predictions.shape)) # 30 lables print(predictions) print('') calculate_accuracy(predictions, y_test) # comparing predicitons outcome with y_test new_features = np.asarray([[6.2, 2.8, 5.7, 1.8]]) predicted_label = clf.predict(np.asarray(new_features)) print('') print('New Features: ' + str(new_features)) print('Predicted label: ' + str(predicted_label)) print('Predicted speices: ' + str(species[int(predicted_label[0])]))
def main(): X_train, y_train, X_test, y_test = load_mnist() knn = Knn() knn.fit(X_train, y_train) y_pred = knn.predict(X_test) correct = sum((y_test - y_pred) == 0) print('==> correct:', correct) print('==> total:', len(X_test)) print('==> acc:', correct / len(X_test)) # plot pred samples fig = plt.subplots(nrows=4, ncols=5, sharex='all', sharey='all')[1].flatten() for i in range(20): img = X_test[i] fig[i].set_title(y_pred[i]) fig[i].imshow(img, cmap='Greys', interpolation='nearest') fig[0].set_xticks([]) fig[0].set_yticks([]) plt.tight_layout() plt.show()
def get_accuracy(k, trainx, trainy, testx, testy): knn = Knn(k) knn.fit(trainx, trainy) hyp = knn.predict(testx) return accuracy_score(hyp, testy)
def test_input_dimension(self): """Test to check that we raise an exception id X and y dimmension are nor consistent""" knn = Knn(n_neighbors=3) with self.assertRaises(ValueError): knn.fit(X_train, y_test)
def test_manhattan_distance(self): """Test to check that manhattan distance is correct""" knn = Knn(n_neighbors=3) knn.fit(np.array(little_X), little_Y) d = knn._manhattan_distance(np.array([5, 6])) assert (d == [7, 7]).all(), "Manhattan Distance is not correct"
def test_euclidean_distance(self): """Test to check that euclidean distance is correct""" knn = Knn(n_neighbors=3) knn.fit(np.array(little_X), little_Y) d = knn._euclidean_distance(np.array([5, 6])) assert (d == [5, 5]).all(), "Euclidean Distance is not correct"
x_train_folds = [] y_train_folds = [] indices = np.array_split(np.arange(num_training), indices_or_sections=num_folds) for i in indices: x_train_folds.append(x_train[i]) y_train_folds.append(y_train[i]) k_to_accuracies = {} for k in k_choices: acc = [] for i in range(num_folds): x = x_train_folds[0:i] + x_train_folds[i + 1:] x = np.concatenate(x, axis=0) y = y_train_folds[0:i] + y_train_folds[i + 1:] y = np.concatenate(y, axis=0) test_x = np.array(x_train_folds[i]) test_y = np.array(y_train_folds[i]) classifier = Knn() print(x) classifier.fit(np.array(x), np.array(y)) y_pred = classifier.predict(k, 'M', test_x) accuracy = np.mean(y_pred == test_y) acc.append(accuracy) k_to_accuracies[k] = acc for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k=%d,accuracy=%f' % (k, accuracy))
from knn import Knn # 导入数据 iris = datasets.load_iris() X = iris.data[:, :2] y = iris.target # Kmeans mdl_kmeans = Kmeans(k=3) mdl_kmeans.fit(X) # KNN n_neighbors = 15 mdl_knn = Knn(k=n_neighbors) mdl_knn.fit(X, y) # 模型库 mdls = [mdl_kmeans, mdl_knn] for mdl in mdls: # 绘制预测图 cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02)) Z = mdl.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.figure()