def knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching labelled_points = np.append(X_train, Y_train.reshape(m_examples, 1), axis=1) t = KDTree.build_tree(labelled_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # record the number of points searched for benchmark/comparison purposes total_points_searched = 0 # perform knn search for each test data for i, x in enumerate(X_predict): (labelled_nearest_neighbors, _, search_space_size) = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func) # nearest neighbor labels are the last column nearest_neighbors_labels = np.array(labelled_nearest_neighbors)[:, -1] Y_predict[i] = mode_with_random_tie_breaking(nearest_neighbors_labels) total_points_searched += search_space_size return Y_predict
def one_vs_all_knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching train_indices = (np.arange(0, m_examples)).reshape(m_examples, 1) indexed_points = np.append(X_train, train_indices, axis=1) t = KDTree.build_tree(indexed_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # perform knn search for each test data for i, x in enumerate(X_predict): indexed_nearest_neighbors = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func)[0] # http://en.wikipedia.org/wiki/Multiclass_classification # use one-vs-all strategy to predict the label possible_labels = set(Y_train) # supposing that each class has at \ # least one representative ... zero_based_indexed_integer_labels = range(0, len(possible_labels)) assert possible_labels.issubset(zero_based_indexed_integer_labels), \ "accept only zero-based indexed, integer labels" # the predicted label will be the one from the classifier that gives # the most votes, so store the votes in a table classifier_votes_tab = { c: 0 for c in zero_based_indexed_integer_labels } for c in zero_based_indexed_integer_labels: Y_c = np.zeros(m_examples) Y_c[Y_train == c] = 1 # neighbor indices are the last column nearest_neighbors_indices = np.array(indexed_nearest_neighbors)[:, -1] votes = int(sum(Y_c[nearest_neighbors_indices.astype(int)])) classifier_votes_tab[c] = votes flattened_table = list(Counter(classifier_votes_tab).elements()) Y_predict[i] = mode_with_random_tie_breaking(flattened_table) return Y_predict
def one_vs_all_knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching train_indices = (np.arange(0, m_examples)).reshape(m_examples, 1) indexed_points = np.append(X_train, train_indices, axis=1) t = KDTree.build_tree(indexed_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # perform knn search for each test data for i, x in enumerate(X_predict): indexed_nearest_neighbors = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func)[0] # http://en.wikipedia.org/wiki/Multiclass_classification # use one-vs-all strategy to predict the label possible_labels = set(Y_train) # supposing that each class has at \ # least one representative ... zero_based_indexed_integer_labels = range(0, len(possible_labels)) assert possible_labels.issubset(zero_based_indexed_integer_labels), \ "accept only zero-based indexed, integer labels" # the predicted label will be the one from the classifier that gives # the most votes, so store the votes in a table classifier_votes_tab = {c: 0 for c in zero_based_indexed_integer_labels} for c in zero_based_indexed_integer_labels: Y_c = np.zeros(m_examples) Y_c[Y_train == c] = 1 # neighbor indices are the last column nearest_neighbors_indices= np.array(indexed_nearest_neighbors)[:,-1] votes = int(sum(Y_c[nearest_neighbors_indices.astype(int)])) classifier_votes_tab[c] = votes flattened_table = list(Counter(classifier_votes_tab).elements()) Y_predict[i] = mode_with_random_tie_breaking(flattened_table) return Y_predict
def knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching labelled_points = np.append(X_train, Y_train.reshape(m_examples,1),axis=1) t = KDTree.build_tree(labelled_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # record the number of points searched for benchmark/comparison purposes total_points_searched = 0 # perform knn search for each test data for i, x in enumerate(X_predict): (labelled_nearest_neighbors, _, search_space_size) = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func) # nearest neighbor labels are the last column nearest_neighbors_labels = np.array(labelled_nearest_neighbors)[:,-1] Y_predict[i] = mode_with_random_tie_breaking(nearest_neighbors_labels) total_points_searched += search_space_size return Y_predict
def main(data_dir): # kNN & RNN configuration: leaf_size = 32 min_extent = 0.0001 k = 8 radius = 1 # kitti velodyne cat = os.listdir(data_dir) iteration_num = len(cat) print("OCTree --------------") construction_time_sum = 0 knn_time_sum = 0 radius_time_sum = 0 brute_time_sum = 0 for i in range(iteration_num): filename = os.path.join(data_dir, cat[i]) point_cloud = read_velodyne_bin(filename) # build tree: begin_t = time.time() octree = OCTree(point_cloud=point_cloud, leaf_size=leaf_size, min_extent=min_extent) construction_time_sum += time.time() - begin_t query = point_cloud[0, :] # kNN query: begin_t = time.time() knn_result_set = KNNResultSet(capacity=k) octree.knn_search(query, knn_result_set) knn_time_sum += time.time() - begin_t # RNN query: begin_t = time.time() rnn_result_set = RadiusNNResultSet(radius=radius) octree.rnn_fast_search(query, rnn_result_set) radius_time_sum += time.time() - begin_t # brute force: begin_t = time.time() diff = np.linalg.norm(point_cloud - query, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] brute_time_sum += time.time() - begin_t print("Octree: build %.3f, knn %.3f, radius %.3f, brute %.3f" % (construction_time_sum * 1000 / iteration_num, knn_time_sum * 1000 / iteration_num, radius_time_sum * 1000 / iteration_num, brute_time_sum * 1000 / iteration_num)) print("KDTree --------------") construction_time_sum = 0 knn_time_sum = 0 radius_time_sum = 0 brute_time_sum = 0 for i in range(iteration_num): filename = os.path.join(data_dir, cat[i]) point_cloud = read_velodyne_bin(filename) # build tree: begin_t = time.time() kd_tree = KDTree(point_cloud=point_cloud, leaf_size=leaf_size) construction_time_sum += time.time() - begin_t query = point_cloud[0, :] # kNN query: begin_t = time.time() knn_result_set = KNNResultSet(capacity=k) kd_tree.knn_search(query, knn_result_set) knn_time_sum += time.time() - begin_t # RNN query: begin_t = time.time() rnn_result_set = RadiusNNResultSet(radius=radius) kd_tree.rnn_search(query, rnn_result_set) radius_time_sum += time.time() - begin_t # brute force: begin_t = time.time() diff = np.linalg.norm(point_cloud - query, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] brute_time_sum += time.time() - begin_t print("Kdtree: build %.3f, knn %.3f, radius %.3f, brute %.3f" % (construction_time_sum * 1000 / iteration_num, knn_time_sum * 1000 / iteration_num, radius_time_sum * 1000 / iteration_num, brute_time_sum * 1000 / iteration_num))