Exemplo n.º 1
0
def main():
    col_names = [
        'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'
    ]
    iris = pd.read_csv('./iris.data', header=None, names=col_names)
    iris_class = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
    iris['species_num'] = [iris_class[i] for i in iris.species]
    X = iris.drop(['species', 'species_num'], axis=1).to_numpy()
    y = iris.species_num.to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    kr = Knn(3)
    # knn = KNeighborsClassifier(3)

    # model = knn.fit(X_train, y_train)
    kr.fit(X_train, y_train)

    # p = model.predict(X_test)
    p2 = kr.predict(X_test)
    correct = 0
    total = 0
    for pred in zip(p2, y_test):
        if pred[0] == pred[1]:
            correct += 1
        total += 1
    print("acc :", correct / total)
Exemplo n.º 2
0
 def test_minkowski_distance(self):
     """Test to check that minkowski distance is correct"""
     knn = Knn(n_neighbors=3, p=5)
     knn.fit(np.array(little_X), little_Y)
     d = knn._minkowski_distance(np.array([3, 4]))
     assert np.allclose(
         d, [2.01234, 6.419382]), "Minkowski Distance is not correct"
Exemplo n.º 3
0
    def test_k_5(self):
        """Test to compare our knn with Sklearn knn when k=5 and distance is euclidean"""
        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5)
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(
            prediction == prediction2), "Error testing knn with k=5"
Exemplo n.º 4
0
    def test_distance_weight_2(self):
        """Test to compare our knn with Sklearn when k=5 and weights are the inverse of distance"""
        knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5, weights='distance')
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(prediction == prediction2
                          ), "Error testing knn with k=5 and weights=distance"
Exemplo n.º 5
0
    def test_k_5_distance_minkowski(self):
        """Test to compare our knn with Sklearn knn when k=5 and distance is minkowski with p=3"""
        knn = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=3)
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5, metric="minkowski", p=3)
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(prediction == prediction2
                          ), "Error testing knn (minkowski) with k=5 and p=3"
Exemplo n.º 6
0
def main():
    X_train, y_train, X_test, y_test = load_mnist()

    # data binarization
    # for i in tqdm(range(len(x_train))):
    #     for j in range(28):
    #         for k in range(28):
    #             x_train[i][j][k] = 1 if x_train[i][j][k] > 177 else 0
    # for i in tqdm(range(len(x_test))):
    #     for j in range(28):
    #         x_test[i][j].squeeze()
    #         for k in range(28):
    #             x_test[i][j][k] = 1 if x_test[i][j][k] > 177 else 0

    # plot data samples
    # plot = plt.subplots(nrows=4, ncols=5, sharex='all', sharey='all')[1].flatten()
    # for i in range(20):
    #     img = x_train[i]
    #     plot[i].set_title(y_train[i])
    #     plot[i].imshow(img, cmap='Greys', interpolation='nearest')
    # plot[0].set_xticks([])
    # plot[0].set_yticks([])
    # plt.tight_layout()
    # plt.show()

    knn = Knn()
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    correct = sum((y_test - y_pred) == 0)

    print('==> correct:', correct)
    print('==> total:', len(X_test))
    print('==> acc:', correct / len(X_test))

    # plot pred samples
    fig = plt.subplots(nrows=4, ncols=5, sharex='all',
                       sharey='all')[1].flatten()
    for i in range(20):
        img = X_test[i]
        fig[i].set_title(y_pred[i])
        fig[i].imshow(img, cmap='Greys', interpolation='nearest')
    fig[0].set_xticks([])
    fig[0].set_yticks([])
    plt.tight_layout()
    plt.show()
def main():

    # getting data
    # returning set of features and set of labels
    # for each 4-elements set of features there is one label assigned
    # label is assgined based on characteristic resulting from features
    iris = datasets.load_iris()
    iris_df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
    X = iris_df.to_numpy()
    y = iris['target']
    iris_df['species'] = iris['target']
    # print(X)  # [[5.9 3.  4.2 1.5],...,[6.  2.2 4.  1. ],...,[6.1 2.9 4.7 1.4]]
    # print(y)  # [0,0,0,....,1,1,1,....,2,2,2,...]
    plot_chart(iris_df)

    # splitting data into training and testing subsets
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1234)

    clf = Knn(k=3)
    clf.fit(X_train,
            y_train)  # fitting model with features and corresponding labels
    predictions = clf.predict(X_test)

    print('Test samples shape: ' + str(X_test.shape))  # 120 features
    print(X_test)
    print('')
    print('Predictions shape: ' + str(predictions.shape))  # 30 lables
    print(predictions)

    print('')
    calculate_accuracy(predictions,
                       y_test)  # comparing predicitons outcome with y_test

    new_features = np.asarray([[6.2, 2.8, 5.7, 1.8]])
    predicted_label = clf.predict(np.asarray(new_features))

    print('')
    print('New Features: ' + str(new_features))
    print('Predicted label: ' + str(predicted_label))
    print('Predicted speices: ' + str(species[int(predicted_label[0])]))
Exemplo n.º 8
0
def main():
    X_train, y_train, X_test, y_test = load_mnist()

    knn = Knn()
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    correct = sum((y_test - y_pred) == 0)

    print('==> correct:', correct)
    print('==> total:', len(X_test))
    print('==> acc:', correct / len(X_test))

    # plot pred samples
    fig = plt.subplots(nrows=4, ncols=5, sharex='all',
                       sharey='all')[1].flatten()
    for i in range(20):
        img = X_test[i]
        fig[i].set_title(y_pred[i])
        fig[i].imshow(img, cmap='Greys', interpolation='nearest')
    fig[0].set_xticks([])
    fig[0].set_yticks([])
    plt.tight_layout()
    plt.show()
Exemplo n.º 9
0
def get_accuracy(k, trainx, trainy, testx, testy):
    knn = Knn(k)
    knn.fit(trainx, trainy)
    hyp = knn.predict(testx)
    return accuracy_score(hyp, testy)
Exemplo n.º 10
0
 def test_input_dimension(self):
     """Test to check that we raise an exception id X and y dimmension are nor consistent"""
     knn = Knn(n_neighbors=3)
     with self.assertRaises(ValueError):
         knn.fit(X_train, y_test)
Exemplo n.º 11
0
 def test_manhattan_distance(self):
     """Test to check that manhattan distance is correct"""
     knn = Knn(n_neighbors=3)
     knn.fit(np.array(little_X), little_Y)
     d = knn._manhattan_distance(np.array([5, 6]))
     assert (d == [7, 7]).all(), "Manhattan Distance is not correct"
Exemplo n.º 12
0
 def test_euclidean_distance(self):
     """Test to check that euclidean distance is correct"""
     knn = Knn(n_neighbors=3)
     knn.fit(np.array(little_X), little_Y)
     d = knn._euclidean_distance(np.array([5, 6]))
     assert (d == [5, 5]).all(), "Euclidean Distance is not correct"
Exemplo n.º 13
0
    x_train_folds = []
    y_train_folds = []
    indices = np.array_split(np.arange(num_training),
                             indices_or_sections=num_folds)
    for i in indices:
        x_train_folds.append(x_train[i])
        y_train_folds.append(y_train[i])
    k_to_accuracies = {}
    for k in k_choices:
        acc = []
        for i in range(num_folds):
            x = x_train_folds[0:i] + x_train_folds[i + 1:]
            x = np.concatenate(x, axis=0)

            y = y_train_folds[0:i] + y_train_folds[i + 1:]
            y = np.concatenate(y, axis=0)

            test_x = np.array(x_train_folds[i])
            test_y = np.array(y_train_folds[i])

            classifier = Knn()
            print(x)
            classifier.fit(np.array(x), np.array(y))

            y_pred = classifier.predict(k, 'M', test_x)
            accuracy = np.mean(y_pred == test_y)
            acc.append(accuracy)
    k_to_accuracies[k] = acc
    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print('k=%d,accuracy=%f' % (k, accuracy))
Exemplo n.º 14
0
from knn import Knn


# 导入数据
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target

# Kmeans
mdl_kmeans = Kmeans(k=3)
mdl_kmeans.fit(X)

# KNN
n_neighbors = 15
mdl_knn = Knn(k=n_neighbors)
mdl_knn.fit(X, y)

# 模型库
mdls = [mdl_kmeans, mdl_knn]

for mdl in mdls:
    # 绘制预测图
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, .02),
                         np.arange(y_min, y_max, .02))
    Z = mdl.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.figure()