def KNNAccuracy(distance, data, k, flag): transformedData = np.dot(data[0], distance.T) feat = RealFeatures(transformedData.T) labels = MulticlassLabels(data[1].astype(np.float64)) dist = EuclideanDistance(feat, feat) knn = KNN(k + 1, dist, labels) knn.train(feat) # Get nearest neighbors. nn = knn.nearest_neighbors() nn = np.delete(nn, 0, 0) # Compute unique labels. uniqueLabels = np.unique(labels) # Keep count correct predictions. count = 0 # Normalize labels for i in range(data[0].shape[0]): for j in range(len(uniqueLabels)): if (labels[i] == uniqueLabels[j]): labels[i] = j break for i in range(nn.shape[1]): mapLabels = [0 for x in range(len(uniqueLabels))] for j in range(nn.shape[0]): if (flag): distPoints = np.linalg.norm(data[0][nn[j][i], :] - data[0][i, :]) # Add constant factor of 1 incase two points overlap mapLabels[int(labels[nn[j, i]])] += 1 / (distPoints + 1)**2 else: # Subtract a variable factor to avoid draw condition without # affecting actual result. mapLabels[int(labels[nn[j, i]])] += 1 - j * 1e-8 maxInd = np.argmax(mapLabels) if (maxInd == labels[i]): count += 1 accuracy = (count / nn.shape[1]) * 100 return accuracy
figure, axarr = pyplot.subplots(3, 1) x, y = sandwich_data() features = RealFeatures(x.T) labels = MulticlassLabels(y) print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert(features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train() L = lmnn.get_linear_transform() knn.set_distance(lmnn.get_distance()) plot_data(x, y, axarr[1]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[1]) axarr[1].set_aspect('equal') axarr[1].set_xlim(-6, 4) axarr[1].set_ylim(-3, 2)
figure, axarr = pyplot.subplots(3, 1) x, y = sandwich_data() features = RealFeatures(x.T) labels = MulticlassLabels(y) print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert (features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train() L = lmnn.get_linear_transform() knn.set_distance(lmnn.get_distance()) plot_data(x, y, axarr[1]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[1]) axarr[1].set_aspect('equal') axarr[1].set_xlim(-6, 4) axarr[1].set_ylim(-3, 2)