def get_closest_waypoint(self, pose): """Identifies the closest path waypoint to the given position https://en.wikipedia.org/wiki/Closest_pair_of_points_problem Args: pose (Pose): position to match a waypoint to Returns: int: index of the closest waypoint in self.waypoints """ if self.waypoints is not None and self.kdtree is None: if VERBOSE: print('tl_detector: g_cl_wp: initializing kdtree') points = [] for i, waypoint in enumerate(self.waypoints): points.append((float(waypoint.pose.pose.position.x), float(waypoint.pose.pose.position.y), i)) self.kdtree = KDTree(points) if self.kdtree is not None: current_position = (pose.position.x, pose.position.y) closest = self.kdtree.closest_point(current_position) if VERBOSE: print('tl_detector: g_cl_wp: closest point to {} is {}'.format( current_position, closest)) return closest[2] return 0
def knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching labelled_points = np.append(X_train, Y_train.reshape(m_examples, 1), axis=1) t = KDTree.build_tree(labelled_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # record the number of points searched for benchmark/comparison purposes total_points_searched = 0 # perform knn search for each test data for i, x in enumerate(X_predict): (labelled_nearest_neighbors, _, search_space_size) = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func) # nearest neighbor labels are the last column nearest_neighbors_labels = np.array(labelled_nearest_neighbors)[:, -1] Y_predict[i] = mode_with_random_tie_breaking(nearest_neighbors_labels) total_points_searched += search_space_size return Y_predict
def test_init_with_list_dim_3(self): tree = KDTree(3, [(1, 2, 3), (0, 1, 4), (2, 4, 3)]) assert tree.root.data == (1, 2, 3) assert tree.root.left.data == (0, 1, 4) assert tree.root.right.data == (2, 4, 3) assert tree.size == 3 assert tree.is_empty() is False
class SimpleGraph: def __init__(self, dim, capacity=100000): self._edges = collections.defaultdict(list) self._kd = KDTree(dim, capacity) self.start_id = None self.target_id = None def __len__(self): return len(self._kd) def insert_new_node(self, point): node_id = self._kd.insert(point) return node_id def add_edge(self, node_id, neighbor_id): self._edges[node_id].append(neighbor_id) self._edges[neighbor_id].append(node_id) def get_parent(self, child_id): return self._edges[child_id] def get_point(self, node_id): return self._kd.get_node(node_id).point def get_nearest_node(self, point): return self._kd.find_nearest_point(point) def get_neighbor_within_radius(self, point, radius): """ Return a list of node_id within the radius """ return self._kd.find_points_within_radius(point, radius)
def build_function(cls, world, viewmode): world.viewmode = viewmode if viewmode == "realtime": resolution = (64, 64) pixel_size = 5 sampler = RegularSampler() else: resolution = (200, 200) pixel_size = 1.6 sampler = MultiJitteredSampler(sample_dim=2) world.viewplane = ViewPlane(resolution=resolution, pixel_size=pixel_size, sampler=sampler) world.camera = PinholeCamera(eye=(5., 2., -7.), up=(0.,1.,0.), lookat=(0.,.5,0.), viewing_distance=700.) world.background_color = (0.0,0.0,0.0) world.tracer = Tracer(world) world.objects = [] matte2 = Matte(ka=1, kd=1, cd=numpy.array([1., 1., 1.])) occluder = AmbientLight(numpy.array((1.,1.,1.)), .2) world.ambient_color = occluder mesh = read_mesh(open('meshes/mesh1.obj')) mesh.material = matte2 boxes = mesh.get_bounding_boxes() tree = KDTree(BoundingBoxes(boxes)) tree.print_tree() world.objects.append(tree) world.lights = [ PointLight(numpy.array((1.,1.,1.)), 1., numpy.array((1., 2., 2.)), radius=4, attenuation=2, cast_shadow=False) ]
class SimpleTree: def __init__(self, dim): self._parents_map = {} self._kd = KDTree(dim) def __len__(self): return len(self._kd) def insert_new_node(self, point, parent=None): node_id = self._kd.insert(point) self._parents_map[node_id] = parent return node_id def get_parent(self, child_id): return self._parents_map[child_id] def get_point(self, node_id): return self._kd.get_node(node_id).point def get_nearest_node(self, point): return self._kd.find_nearest_point(point) def construct_path_to_root(self, leaf_node_id): path = [] node_id = leaf_node_id while node_id is not None: path.append(self.get_point(node_id)) node_id = self.get_parent(node_id) return path def get_num_nodes(self): return len(self._parents_map)
def test_init_with_list_dim_2(self): tree = KDTree(2, [(1, 1), (3, 3), (2, 2)]) assert tree.root.data == (1, 1) assert tree.root.left == None assert tree.root.right.data == (3, 3) assert tree.root.right.left.data == (2, 2) assert tree.size == 3 assert tree.is_empty() is False
def __init__(self, p_set, measurer=SquareDistMeasurer(2)): super(KDFinder, self).__init__(None, measurer) assert p_set is not None self.count = len(p_set) assert self.count > 0 self.tree = KDTree(None, 'value', measurer.k) for i in range(len(p_set)): self.tree.insert(Element(p_set[i], i)) self.debug = False
def test_init_with_larger_list_dim_5(self): tree = KDTree(5, [(1, 2, 3, 4, 5), (0, 1, 4, 1, 2), (2, 4, 3, 6, 7), (9, 8, 10, 7, 3), (-1, 0, 0, 14, 15)]) assert tree.root.data == (1, 2, 3, 4, 5) assert tree.root.left.data == (0, 1, 4, 1, 2) assert tree.root.right.data == (2, 4, 3, 6, 7) assert tree.root.right.right.data == (9, 8, 10, 7, 3) assert tree.root.left.left.data == (-1, 0, 0, 14, 15) assert tree.size == 5 assert tree.is_empty() is False
def __init__(self, world): self.points = KDTree(2, [world.kdtreeStart]) # number of samples in the prm self.size = 2500 # number of connections per sample self.connsPerSample = 6 self.carSize = world.carSize self.getPoints(world) # edges of the prm self.connections = self.getConnections(world)
def __init__(self, start, goal): self.start = start self.goal = goal # distance within which tree has 'reached' the goal self.eps = 5 # extension distance self.ext = 10 self.tree = KDTree(len(self.start), [self.start]) # edges self.connections = dict()
def test_nearest_neighbors(self): tree = KDTree(2, [(1, 1), (2, 2), (4, 4)]) neighbors = tree.nearest_neighbors((0, 0)) assert neighbors[0] == ((1, 1), 1.4142135623730951) assert neighbors[1] == ((2, 2), 2.8284271247461903) assert neighbors[2] == ((4, 4), 5.656854249492381) neighbors2 = tree.nearest_neighbors((1, 1)) assert neighbors2[0] == ((1, 1), 0) assert neighbors2[1] == ((2, 2), 1.4142135623730951) assert neighbors2[2] == ((4, 4), 4.242640687119285)
def fit(self, X, y): """Fit the model (build tree based on X). Args: X (array-like, shape (n_samples, n_features)): Training data. y (array, shape (n_samples,)): Target values. """ self.X, self.y = np.array(X), np.array(y) if self.algorithm == 'kd_tree': self.tree = KDTree(X, self.leaf_size, self.p) if self.algorithm == 'ball_tree': pass
def setUp(self): self.dim = 3 self.count = 20 points = np.random.randint(low=0, high=20, size=(self.count, self.dim)).tolist() self.tree = KDTree(self.dim) for p in points: self.tree.insert(p) np.random.shuffle(points) self.points = points
def __init__(self, X=None, y=None, kernel="se", kernel_params=(1, 1,), kernel_priors = None, kernel_extra = None, mean="constant"): self.kernel_name = kernel self.kernel_params = kernel_params self.kernel_priors = kernel_priors self.kernel = kernels.setup_kernel(kernel, kernel_params, kernel_extra, kernel_priors) self.X = X self.n = X.shape[0] if mean == "zero": self.mu = 0 self.y = y if mean == "constant": self.mu = np.mean(y) self.y = y - self.mu if mean == "linear": raise RuntimeError("linear mean not yet implemented...") self.mean = mean self.K = None self.L = None self.alpha = None self.Kinv = None self.posdef = True self.tree = KDTree(X) self.alpha = self._treeCG() print "trained CG, found alpha", self.alpha self._invert_kernel_matrix() print "true alpha is", self.alpha
def _proximity_filter(point, data, total): """ Given a point, and a data list of coordinate tuples, we return an n number of coordinate tuples amounting to total """ tree = KDTree.construct_from_data(data) return tree.query(query_point=point, t=total)
def grow(self): recovery_indx_table = self.__add_ghost_boundary() coords_pair_lst = self.__gen_coords_pair_lst() stree = KDTree(coords_pair_lst) #stree = spatial.KDTree(coords_pair_lst) #print stree.data return recovery_indx_table, stree
class KNeighborsBase(object): def __init__(self): self.k_neighbors = None self.tree = None def fit(self, X, y, k_neighbors=3): self.k_neighbors = k_neighbors self.tree = KDTree() self.tree.build_tree(X, y) # 1.获取kd_Tree # 2.建立大顶堆 # 3.建立队列 # 4.外层循环更新大顶堆 # 5.内层循环遍历kd_Tree # 6.满足堆顶是第k近邻时退出循环 def knn_search(self, Xi): tree = self.tree heap = MaxHeap(self.k_neighbors, lambda x: x.dist) # 搜索Xi时,从根节点到叶节点的路径 nd = tree.search(Xi, tree.root) # 初始化队列 que = [(tree.root, nd)] while que: # 计算Xi和根节点的距离 nd_root, nd_cur = que.pop(0) nd_root.dist = tree.get_eu_dist(Xi, nd_root) heap.add(nd_root) while nd_cur is not nd_root: # 计算Xi和当前节点的距离 nd_cur.dist = tree.get_eu_dist(Xi, nd_cur) # 更新最好的节点和距离 heap.add(nd_cur) if nd_cur.brother and (not heap or heap.items[0].dist > tree.get_hyper_plane_dist(Xi, nd_cur.father)): _nd = tree.search(Xi, nd_cur.brother) que.append((nd_cur.brother, _nd)) nd_cur = nd_cur.father return heap def _predict(self, Xi): return NotImplemented def predict(self, X): return [self._predict(Xi) for Xi in X]
def test_size(self): tree = KDTree(1) assert tree.size == 0 tree.insert('B') assert tree.size == 1 tree.insert('A') assert tree.size == 2 tree.insert('C') assert tree.size == 3
def autodiscover(): global kd_tree from pixel.models import Pixel cc = list(Pixel.objects.all()) kd_tree = KDTree.construct_from_data(cc)
def one_vs_all_knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching train_indices = (np.arange(0, m_examples)).reshape(m_examples, 1) indexed_points = np.append(X_train, train_indices, axis=1) t = KDTree.build_tree(indexed_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # perform knn search for each test data for i, x in enumerate(X_predict): indexed_nearest_neighbors = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func)[0] # http://en.wikipedia.org/wiki/Multiclass_classification # use one-vs-all strategy to predict the label possible_labels = set(Y_train) # supposing that each class has at \ # least one representative ... zero_based_indexed_integer_labels = range(0, len(possible_labels)) assert possible_labels.issubset(zero_based_indexed_integer_labels), \ "accept only zero-based indexed, integer labels" # the predicted label will be the one from the classifier that gives # the most votes, so store the votes in a table classifier_votes_tab = { c: 0 for c in zero_based_indexed_integer_labels } for c in zero_based_indexed_integer_labels: Y_c = np.zeros(m_examples) Y_c[Y_train == c] = 1 # neighbor indices are the last column nearest_neighbors_indices = np.array(indexed_nearest_neighbors)[:, -1] votes = int(sum(Y_c[nearest_neighbors_indices.astype(int)])) classifier_votes_tab[c] = votes flattened_table = list(Counter(classifier_votes_tab).elements()) Y_predict[i] = mode_with_random_tie_breaking(flattened_table) return Y_predict
def __init__(self, start, goal, llim, ulim, robot_radius, obstacles, dim=2): """ start: obstacles: Only rectangular for now. np.array of (x,y) positions defining a set of obstacles """ self._dim = dim start, goal = np.array(start), np.array(goal) self._check_last_axis(start, "start") self._check_last_axis(goal, "goal") self._obstacle_config = obstacles self._obstacles = self.create_obstacles(obstacles) self._obs_kdtree = KDTree(self._obstacles) self._robot_radius = robot_radius self.llim = np.array(llim) self.ulim = np.array(ulim) self.start = start self.goal = goal
def __init__(self, eps, MinPts, pointlist): self.eps = eps self.MinPts = MinPts self.points = pointlist self.unvisited = [i for i in range(len(pointlist))] self.kdtree = KDTree.construct_from_data(self.formatpoints()) self.pointidmap = {} for point in pointlist: self.pointidmap[tuple(point.coordinates)] = point.id
class SimpleTree: def __init__(self, dim): self._parents_map = {} self._kd = KDTree(dim) def insert_new_node(self, point, parent=None): node_id = self._kd.insert(point) self._parents_map[node_id] = parent return node_id def get_parent(self, child_id): return self._parents_map[child_id] def get_point(self, node_id): return self._kd.get_node(node_id).point def get_nearest_node(self, point): return self._kd.find_nearest_point(point)
def createTrackpointTree(self, trackpoints): ''' Create a tree out of the trackpoints ''' self.track_tupel_list = [] #Change from Vec3 to tupel for point in self.trackpoints: self.track_tupel_list.append( (point.getX(), point.getY(), point.getZ())) self.list4tree = self.track_tupel_list[:] return KDTree.construct_from_data(self.list4tree)
def one_vs_all_knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching train_indices = (np.arange(0, m_examples)).reshape(m_examples, 1) indexed_points = np.append(X_train, train_indices, axis=1) t = KDTree.build_tree(indexed_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # perform knn search for each test data for i, x in enumerate(X_predict): indexed_nearest_neighbors = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func)[0] # http://en.wikipedia.org/wiki/Multiclass_classification # use one-vs-all strategy to predict the label possible_labels = set(Y_train) # supposing that each class has at \ # least one representative ... zero_based_indexed_integer_labels = range(0, len(possible_labels)) assert possible_labels.issubset(zero_based_indexed_integer_labels), \ "accept only zero-based indexed, integer labels" # the predicted label will be the one from the classifier that gives # the most votes, so store the votes in a table classifier_votes_tab = {c: 0 for c in zero_based_indexed_integer_labels} for c in zero_based_indexed_integer_labels: Y_c = np.zeros(m_examples) Y_c[Y_train == c] = 1 # neighbor indices are the last column nearest_neighbors_indices= np.array(indexed_nearest_neighbors)[:,-1] votes = int(sum(Y_c[nearest_neighbors_indices.astype(int)])) classifier_votes_tab[c] = votes flattened_table = list(Counter(classifier_votes_tab).elements()) Y_predict[i] = mode_with_random_tie_breaking(flattened_table) return Y_predict
def knn(): f = open('train.csv' , 'r') data = [] # all labeled data lookupTable = dict() num = 0 for line in f: d = line.split(',') if d[0] == 'label': continue d = map(int , d) data.append(d) lookupTable[tuple(d[1:])] = d[0] if num > 40000: break num += 1 f.close() points1 = map(lambda x : tuple(x[1:]) , data) tree = KDTree.construct_from_data(points1) num = 0 points = map(lambda x : x[1:] , data) f = open('train.csv' , 'r') for line in f: num += 1 if num < 32000: continue if num > 32100: break d = line.split(',') if d[0] == 'label': continue d = map(int , d) start = time.mktime(time.localtime()) nn = tree.query(tuple(d[1:]) , 10) end = time.mktime(time.localtime()) #print str(end - start) + ' secs to get distances' start = time.mktime(time.localtime()) #nn = nearestNeighbours(points , d[1:] , 10) counts = defaultdict(int) for x in nn: counts[lookupTable[x]] += 1 print str(d[0] == sorted(counts , key = lambda x : counts[x] , reverse = True)[0]) f.close()
def build_function(cls, world, viewmode): world.viewmode = viewmode if viewmode == "realtime": resolution = (64, 64) pixel_size = 5 sampler = RegularSampler() else: resolution = (500, 500) pixel_size = .64 sampler = MultiJitteredSampler(sample_dim=1) world.viewplane = ViewPlane(resolution=resolution, pixel_size=pixel_size, sampler=sampler) world.camera = PinholeCamera(eye=(0., 2., 7.), up=(0.,1.,0.), lookat=(0.,1.5,0.), viewing_distance=300.) world.background_color = (0.0,0.0,0.0) world.tracer = Tracer(world) world.objects = [] matte1 = Matte(ka=1, kd=1, cd=numpy.array((1., .84, .1))) mirror_mat = Reflective(ka=0.6, kd=0.6, ks=0.6, kr=1.0, exp=100, cd=numpy.array((1., 1., 1.))) occluder = AmbientLight(numpy.array((1.,1.,1.)), .2) world.ambient_color = occluder mesh = read_mesh(open('meshes/teapot.obj')) mesh.compute_smooth_normal() mesh.material = matte1 boxes = mesh.get_bounding_boxes() tree = KDTree(BoundingBoxes(boxes)) tree.print_tree() world.objects.append(tree) plane = Plane(origin=(0,0,0), normal=(0,1,0), material=mirror_mat) world.objects.append(plane) world.lights = [ PointLight(numpy.array((1.,1.,1.)), 1., numpy.array((1., 8., 2.)), radius=10, attenuation=2, cast_shadow=True) ]
def planning(self, sx, sy, gx, gy, ox, oy, robot_radius): obstacle_tree = KDTree(np.vstack((ox, oy)).T) sample_x, sample_y = self.voronoi_sampling(sx, sy, gx, gy, ox, oy) if show_animation: # pragma: no cover plt.plot(sample_x, sample_y, ".b") road_map_info = self.generate_road_map_info( sample_x, sample_y, robot_radius, obstacle_tree) rx, ry = DijkstraSearch(show_animation).search(sx, sy, gx, gy, sample_x, sample_y, road_map_info) return rx, ry
def build_function(cls, world, viewmode): world.viewmode = viewmode if viewmode == "realtime": resolution = (64, 64) pixel_size = 5 sampler = RegularSampler() else: resolution = (200, 200) pixel_size = 1.6 sampler = MultiJitteredSampler(sample_dim=10) mat_sampler = MultiJitteredSampler(sample_dim=10) world.viewplane = ViewPlane(resolution=resolution, pixel_size=pixel_size, sampler=sampler) world.camera = PinholeCamera(eye=(0., 1, 6.), up=(0.,1.,0.), lookat=(0.,1,0.), viewing_distance=800.) world.background_color = (0.0,0.0,0.0) world.tracer = PathTracer(world) world.objects = [] occluder = AmbientLight(numpy.array((1.,1.,1.)), .2) world.ambient_color = occluder world_objects = read_mesh_complex('CornellBox/CornellBox-Original.obj', mat_sampler=mat_sampler) boxes = [] for key, mesh in world_objects.iteritems(): mesh.compute_normal() boxes += mesh.get_bounding_boxes() tree = KDTree(BoundingBoxes(boxes)) tree.print_tree() world.objects.append(tree) world.lights = [ PointLight(numpy.array((1., 1., 1.)), 1., numpy.array((0., 1., 4.)), radius=4, attenuation=2, cast_shadow=True) ]
def generate_road_map_info(self, node_x, node_y, rr, obstacle_tree): """ Road map generation node_x: [m] x positions of sampled points node_y: [m] y positions of sampled points rr: Robot Radius[m] obstacle_tree: KDTree object of obstacles """ road_map = [] n_sample = len(node_x) node_tree = KDTree(np.vstack((node_x, node_y)).T) for (i, ix, iy) in zip(range(n_sample), node_x, node_y): index, dists = node_tree.search( np.array([ix, iy]).reshape(2, 1), k=n_sample) inds = index[0] edge_id = [] for ii in range(1, len(inds)): nx = node_x[inds[ii]] ny = node_y[inds[ii]] if not self.is_collision(ix, iy, nx, ny, rr, obstacle_tree): edge_id.append(inds[ii]) if len(edge_id) >= self.N_KNN: break road_map.append(edge_id) # plot_road_map(road_map, sample_x, sample_y) return road_map
def knn_classification(k, dist_func, X_train, Y_train, X_predict): (m_examples, n_dimensions) = X_train.shape # use kd tree structure for knn searching labelled_points = np.append(X_train, Y_train.reshape(m_examples,1),axis=1) t = KDTree.build_tree(labelled_points, n_dimensions) # store results in the predictions vector Y_predict = np.empty(X_predict.shape[0]) # record the number of points searched for benchmark/comparison purposes total_points_searched = 0 # perform knn search for each test data for i, x in enumerate(X_predict): (labelled_nearest_neighbors, _, search_space_size) = \ KDTree.knn_search(t, x, k, n_dimensions, dist_func) # nearest neighbor labels are the last column nearest_neighbors_labels = np.array(labelled_nearest_neighbors)[:,-1] Y_predict[i] = mode_with_random_tie_breaking(nearest_neighbors_labels) total_points_searched += search_space_size return Y_predict
def merge_graph(self, ): """ merge_graph and reassign work on queue """ work = self._graph_queue.get() if work: work = WorkMessage.deserialize(work.decode('utf-8')) id = (tuple(work.llim), tuple(work.ulim)) curr_graph = self._graph_by_division[id] curr_graph = nx.compose(curr_graph, work.networkx_graph) self._overall_graph = nx.compose(self._overall_graph, work.networkx_graph) overall_nodelist = list(self._overall_graph.nodes) overall_kdtree = KDTree(overall_nodelist) required_vertices = [] for vertex in self._scenario.get_vertices(work.llim, work.ulim): idxs = overall_kdtree.search_in_distance(vertex, 20) required_vertices.extend([overall_nodelist[i] for i in idxs]) graph_to_be_sent = self._overall_graph.subgraph(required_vertices) curr_graph = nx.compose(curr_graph, graph_to_be_sent) curr_message = WorkMessage(work.llim, work.ulim, curr_graph) self._worker_queue.put(curr_message.serialize()) self._graph_by_division[id] = curr_graph
def fitICPkdTree(model, target): Tf = np.eye(4, 4) dif = 100 nIter = 0 kdTree = KDTree(list(target)) while nIter < MAX_ITER and dif > 10: T1, pit1 = ICPstepKDTree(model, kdTree) Tf = Tf.dot(T1) saDif = vector(sum(abs(model - pit1))) dif = saDif.mag #difference with respect to the anterior model print nIter, dif #points(pos=pit1,color=(0,1,1)) model = pit1 nIter += 1 #print nIter,dif return Tf, pit1
def nearest_filtered(Primary_Technology, Role): #Separating out the indices #Opening the files with open("C:\DataMining\IndexPredictionsOutput.csv", 'rb') as f: reader = csv.reader(f) data = map(tuple, reader) # Filtering the list def f1(t): return t[2] == Primary_Technology # Primary Skill is Informatica def f2(t): return t[11] == Role # Role filters = [f1, f2] filtered_data = filter_lambda(filters, data) kd_filtered_data = map( operator.itemgetter(0, 57, 58, 59, 60, 61, 62, 63, 64, 65), filtered_data) # Creating the tree tree = KDTree.construct_from_data(kd_filtered_data) # Finding the nearest neighbours, t can be varied for the number of neighbours nearest = tree.query(query_point=(0, 10, 10, 10, 10, 10, 10, 10, 10, 10), t=5) # The serial number of the nearest neighbours nearest_index = [x[0] for x in nearest] # Using this to filter the original list kd_filtered_nearest = [ tup for tup in filtered_data if tup[0] in nearest_index ] # Preparing the dataset to be printed kd_filtered_nearest_printed = map(operator.itemgetter(1, 2, 8, 9, 10), kd_filtered_nearest) return kd_filtered_nearest_printed
def nearest_filtered(Primary_Technology, Role): #Separating out the indices #Opening the files with open("C:\DataMining\IndexPredictionsOutput.csv", 'rb') as f: reader = csv.reader(f) data = map(tuple, reader) # Filtering the list def f1(t): return t[2].strip()==Primary_Technology # Primary Skill is Informatica def f2(t): return t[11].strip()==Role # Role filters = [f1,f2] filtered_data = filter_lambda(filters, data) kd_filtered_data = map(operator.itemgetter(0,58,59,60,61,62,63,64,65,66), filtered_data) # Creating the tree tree = KDTree.construct_from_data(kd_filtered_data) # Finding the nearest neighbours, t can be varied for the number of neighbours nearest = tree.query(query_point=(85,10,10,10,10,10,10,10,10,10), t=10) # The serial number of the nearest neighbours nearest_index = [x[0] for x in nearest] # Using this to filter the original list kd_filtered_nearest = [tup for tup in filtered_data if tup[0] in nearest_index] # Preparing the dataset to be printed kd_filtered_nearest_printed = map(operator.itemgetter(0,1,2,26,8,23,10), kd_filtered_nearest) return kd_filtered_nearest_printed
avg={} for i,fn in enumerate(fns): if fn in avg.keys(): continue if i%50 ==0: print "Calculated average of {} pictures out of {}".format(i, len(fns)) try: img = img_as_ubyte(io.imread(fn)) avg[fn] = gu.get_average_color_lab(img) except Exception,e: print "weird file {}: {} - skipped from index".format(fn,e) continue print "Building tree nao" bounds = [[0,100],[-128,128],[-128,128]] # TODO What iz bounds of lab color space? index = KDTree(bounds, leaf_size_hint) index.bulk_insert_dict_value_to_spatial(avg) pickle.dump(index, open(pickle_file, 'wb')) return index # in place! def checkerboard(img, parts): """Replace all image parts with black and white (just for fun)""" parts = list(itertools.chain.from_iterable(parts)) # flatten the list of lists for i,part in enumerate(parts): pixels = part.get_all_pixels() for px in pixels: img[px] = (0,0,0) if i%2==0 else (255,255,255) # in place!
def __init__(self, data): self.data = data print("Kd-tree will be constructed...") self.tree = KDTree.construct_from_data(data) print("Kd-tree construction done!") self.rel_levels = set([vector.rel for vector in data])
def fitting_obj_sample(param): """ computes residuals based on distance from ellipsoid can be used with different loss-functions on residual """ obj = 0 # centers cx = param[0] cy = param[1] cz = param[2] rx = param[3] ry = param[4] rz = param[5] sx, sy, sz = ellipsoid(cx, cy, cz, rx, ry, rz, 20) num_samples = len(sx) #plot_point_cloud(sx, sy, sz) print "num_samples", num_samples #import pdb #pdb.set_trace() #data = numpy.array(zip(sx, sy, sz)).T #tree = kdt.kdtree( data, leafsize=1000 ) data = zip(sx, sy, sz) tree = KDTree.construct_from_data(data) num_queries = len(x) print "num_queries", num_queries global global_loss global_loss = numpy.zeros(num_queries) for idx in range(num_queries): """ Compute the unique root tbar of F(t) on (-e2*e2,+infinity); x0 = e0*e0*y0/(tbar + e0*e0); x1 = e1*e1*y1/(tbar + e1*e1); x2 = e2*e2*y2/(tbar + e2*e2); distance = sqrt((x0 - y0)*(x0 - y0) + (x1 - y1)*(x1 - y1) + (x2 - y2)*(x2 - y2)) """ query = (x[idx], y[idx], z[idx]) nearest, = tree.query(query_point=query, t=1) residual = dist.euclidean(query, nearest) #obj += loss_functions.squared_loss(residual) #obj += loss_functions.abs_loss(residual) #obj += loss_functions.eps_loss(residual, 2) #obj += loss_functions.eps_loss_bounded(residual, 2) loss_xt = loss_functions.eps_loss_asym(residual, 2, 1.0, 0.2) obj += loss_xt global_loss[idx] = num_queries #obj += eps_loss(residual, 2)*data_intensity[idx] # add regularizer to keep radii close reg = 10 * regularizer(param) print "loss", obj print "reg", reg obj += reg return obj
class KNeighborsClassifier(object): """Implementation of KNeighborsClassifier. Attributes: fit: Fit the model (build tree based on X). kneighbors: Find the K-neighbors of a point. predict_proba: Return probability estimates for the test data X. predict: Predict the class labels for the provided data. score: Returns the mean accuracy on the given test data and labels. """ def __init__(self, n_neighbors=5, weights='uniform', algorithm='kd_tree', leaf_size=30, p=2): """Init KNeighborsClassifier. Args: n_neighbors (int): Number of neighbors to use by default for kneighbors queries. weights ({'uniform', 'distance'}): Weight function used in prediction. Possible values: 'uniform': uniform weights. All points in each neighborhood are weighted equally. 'distance': weight points by the inverse of their distance. algorithm ({'kd_tree', 'ball_tree'}): Algorithm used to compute the nearest neighbors. leaf_size (int): Leaf size passed to BallTree or KDTree. p (int): Power parameter for the Minkowski metric. """ self.n_neighbors = max(n_neighbors, 1) self.weights = weights if weights in ['uniform', 'distance'] else 'uniform' self.algorithm = algorithm if algorithm in ['kd_tree'] else 'kd_tree' self.leaf_size = leaf_size self.p = p self.tree = None self.X, self.y = None, None def fit(self, X, y): """Fit the model (build tree based on X). Args: X (array-like, shape (n_samples, n_features)): Training data. y (array, shape (n_samples,)): Target values. """ self.X, self.y = np.array(X), np.array(y) if self.algorithm == 'kd_tree': self.tree = KDTree(X, self.leaf_size, self.p) if self.algorithm == 'ball_tree': pass def kneighbors(self, X, n_neighbors=None, return_distance=True): """Find the K-neighbors of a point. Args: X (array-like, shape (n_query, n_features)): The query point or points. n_neighbors (int): Number of neighbors to get (default is the value passed to the constructor). return_distance (bool): If False, distances will not be returned. Returns: ind (array of integers, shape (n_query, n_neighbors)): The list of indices of the neighbors of the corresponding point dist (array of doubles, shape (n_query, n_neighbors)): The list of distances to the neighbors of the corresponding point. """ n_neighbors = n_neighbors if n_neighbors is not None else self.n_neighbors res = list() for x in np.array(X): res.append(self.tree.query(x, n_neighbors, return_distance=return_distance)) return res @staticmethod def __get_proba(labels, weights): """Calculate probability for each label.""" proba = dict() for label, weight in zip(labels, weights): proba.setdefault(label, 0) proba[label] += weight total_weights = sum(proba.values()) for label in proba: proba[label] /= total_weights return proba def predict_proba(self, X): """Return probability estimates for the test data X. Args: X (array-like, shape (n_query, n_features)): Query samples. Returns: p (array, shape (n_query, dict{label: proba})): The class probabilities of the query samples. """ res = list() for x in np.array(X): neighbors = self.tree.query(x, self.n_neighbors, return_distance=True) labels = self.y[[i for i, _ in neighbors]] weights = np.ones(len(neighbors)) if self.weights == 'uniform' else 1 / np.array([d for _, d in neighbors]) res.append(self.__get_proba(labels, weights)) return res def predict(self, X): """Predict the class labels for the provided data. Args: X (array-like, shape (n_query, n_features)): Query samples. Returns: y (array, shape (n_query,)): Class labels for each query sample. """ probas = self.predict_proba(X) res = list() for proba in probas: label, maxp = None, 0 for l in proba: if proba[l] > maxp: label, maxp = l, proba[l] res.append(label) return res def score(self, X, y): """Returns the mean accuracy on the given test data and labels. Args: X (array-like, shape (n_query, n_features)): Test samples. y (array, shape (n_query,)): True labels for X Returns: score (float): Mean accuracy of self.predict(X) wrt. y. """ return (self.predict(X) == np.array(y)).sum() / len(y)
for city in files: points = [] city_name = city.split('.')[0] with open('geopositions/%s.txt'%city_name, 'rb') as csvfile: georeader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in georeader: points.append((float(row[0]), float(row[1]), 1.0)) block_size = 100 start_time = time.time() while len(points) > 2000: print len(points) s_time = time.time() tree = KDTree.construct_from_data(points) # min_dist = 2000000000 # f_point = None # s_point = None pairs = [] for point in points: nearest = tree.query(query_point=point, t=2) found_point = nearest[1] dist = get_dist(found_point, point) pairs.append((point, found_point, dist)) # if dist < min_dist: # f_point = found_point # s_point = point # min_dist = dist pairs.sort(key=lambda x: x[2]) all_pairs = []
def proximity_filter(point, data, total): """ given a point, and a data set of points, we return a list of points, capped with a length of _total_, sorted in proximity. """ tree = KDTree.construct_from_data(data) return tree.query(query_point=point, t=total)
def buildKDTree(): #Create 2DTree with topics' coordinates data = topic_dict.keys() tree = KDTree.construct_from_data(data) return tree
class TreeGP(GaussianProcess): def __init__(self, X=None, y=None, kernel="se", kernel_params=(1, 1,), kernel_priors = None, kernel_extra = None, mean="constant"): self.kernel_name = kernel self.kernel_params = kernel_params self.kernel_priors = kernel_priors self.kernel = kernels.setup_kernel(kernel, kernel_params, kernel_extra, kernel_priors) self.X = X self.n = X.shape[0] if mean == "zero": self.mu = 0 self.y = y if mean == "constant": self.mu = np.mean(y) self.y = y - self.mu if mean == "linear": raise RuntimeError("linear mean not yet implemented...") self.mean = mean self.K = None self.L = None self.alpha = None self.Kinv = None self.posdef = True self.tree = KDTree(X) self.alpha = self._treeCG() print "trained CG, found alpha", self.alpha self._invert_kernel_matrix() print "true alpha is", self.alpha def _trueMVM(self, v): self._invert_kernel_matrix() return np.dot(self.K, v) # return v multiplied by the kernel matrix def _treeMVM(self, v): result = np.zeros(v.shape) self.tree.update_p(self.X, v) for i,x in enumerate(self.X): s, wt = self.tree.weighted_sum(x, self.kernel, 0, epsilon = 0.01) # s, wt = self.tree.exact_weighted_sum(x, self.kernel) result[i] = s # r2 = self._trueMVM(v) #discr = np.linalg.norm(result-r2) #print "result discrepancy", discr #if discr > 0.01: # import pdb # pdb.set_trace() return result # CG algorithm from Schewchuk "Without the Agonizing Pain", appendix B2 def _treeCG(self, epsilon=0.001): imax = 10 b = self.y # x = ""a real init point"" # r = b - self._treeMVM(x) x = np.zeros((self.n,)) r = b d = r delta_new = np.dot(r,r) delta0 = delta_new i=0 while i < imax and delta_new > epsilon**2 * delta0: print "CG iteration %d of %d, delta = %f" % (i, imax, delta_new) q = self._treeMVM(d) alpha = delta_new / np.dot(d, q) x = x + alpha * d if i % 50 == 0: r = b - self._treeMVM(x) else: r = r - alpha*q delta_old = delta_new delta_new = np.dot(r,r) beta = delta_new/delta_old d = r + beta*d i = i+1 return x
#!/usr/bin/env python import fileinput import random from kdtree import KDTree # read in the points from a file specified on the command line. E.g.: # $ ./kdtree_test.py ../../../data/sim_waypoints.csv points = [] i=0 for line in fileinput.input(): line_parts = line.split(',') points.append((float(line_parts[0]), float(line_parts[1]), int(i))) i += 1 # generate the K-D Tree from the points kdtree = KDTree(points) # pick a random point from the points rand_index = random.randint(0, len(points)) # find the closest point point = points[rand_index] print ("randomly chose point {} at index {}".format(point, rand_index)) new_point = (point[0] + 2.0, point[1] + 7.0) print ("tweaked x,y to be {}".format(new_point, rand_index)) closest = kdtree.closest_point(new_point) print ("closest point to {} is {}".format(new_point, closest))
# declare a 2D array for confusion matrix confusionMatrix = [[0 for x in xrange(26)] for x in xrange(26)] listOfPoints = [] with open('training_data.txt', 'r') as f: for line in f: if counter < 15000: listOfPoints.append(getDataElementFrom(line)) counter += 1 continue if isKDTreeConstructed == False: start = time.clock() kdTree = KDTree.construct_from_data(listOfPoints) elapsedForKDTreeConstruction = (time.clock() - start) isKDTreeConstructed = True print "KDTree constructed in %.2fs" % (elapsedForKDTreeConstruction) searchStartTime = time.clock() print "Evaluating input data..." currentLine = getDataElementFrom(line) nearest = kdTree.query(currentLine) confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1 if currentLine.lettr == nearest[0].lettr: properMatches += 1 counter += 1 if counter % 500 == 0:
def top_down(grid, output, tile_size): ''' Starts matching from top-left, going to bottom-right ''' #user_image = UserImage() #cursor = Pixel.objects.all() # size = 500 # mm = Pixel.objects.count()-size-1 # if mm > 10: # index = random.randint(0,mm) # else: # index = 0 # # cursor = Pixel.objects.all()[index:index+size] cursor = Pixel.objects.all() #image_list = ImageList(gen(cursor)) image_list = KDTree.construct_from_data(list(cursor)) #nearest = tree.query(query_point=(5,4,3), t=3) _tile_list = dict() counter = 0 for yPos, y in enumerate(grid): for xPos, x in enumerate(grid[yPos]): #print counter, rgb = grid[yPos][xPos].color qrgb = quantize_color(rgb) #tile = image_list.search(rgb).image.blob #tile_wrapper = image_list.search(rgb) w = image_list.query(query_point=qrgb, t=1) #i = random.randint(0,len(w)-1) tile_pixel = w[0] #tile_pixel = tile_wrapper.pixel #print tile_pixel.id #tile = tile_wrapper.image #tile = Image.open(StringIO(tile_pixel.image1.file.read())) tile = tile_pixel.image tile.thumbnail(tile_size) xy = (xPos * tile_size[0], yPos * tile_size[1]) #print tile output.paste(tile, xy) counter += 1 _tile_list.setdefault((tile_pixel.id), list()).append((xy[0],xy[1])) #print counter return _tile_list;
listXa=[] for i in range(len(limitedListCA_ProtA)): tupp = tuple([round(limitedListCA_ProtA[i][0], 3)])+tuple([round(limitedListCA_ProtA[i][1],3)])+tuple([round(limitedListCA_ProtA[i][2], 3)])+tuple([Va[i].T]) #eigenvector Va.T is appended to each node listXa.append(tupp) #listXb is the list of atoms in protein b listXb=[] for i in range(len(limitedListCA_ProtB)): tupp = tuple([round(limitedListCA_ProtB[i][0], 3)])+tuple([round(limitedListCA_ProtB[i][1],3)])+tuple([round(limitedListCA_ProtB[i][2], 3)])+tuple([Vb[i]]) #eigenvector Vb is appended to each node listXb.append(tupp) data1 = listXa data2 = listXb Tree1 = KDTree.construct_from_data(data1) Tree2 = KDTree.construct_from_data(data2) score = 0 #print("####################################") #Times for KD Tree approach startT = time.time() for i in range(len(data1)): #finds the atoms within radius 30 of query pt score += Tree2.queryrange(query_point=data1[i], r = 50) solveTime = time.time() - startT print(solveTime) #Time for non-tree approach #startT = time.time()
class TestKDTree(unittest.TestCase): def setUp(self): self.dim = 3 self.count = 20 points = np.random.randint(low=0, high=20, size=(self.count, self.dim)).tolist() self.tree = KDTree(self.dim) for p in points: self.tree.insert(p) np.random.shuffle(points) self.points = points def test_contain(self): for p in self.points: self.assertTrue(self.tree.contain(p)) random_points = np.random.randint(low=30, size=(self.count, self.dim)).tolist() for p in random_points: self.assertFalse(self.tree.contain(p)) def test_find_min(self): for d in range(self.dim): sorted_at_dim = sorted(self.points, key=operator.itemgetter(d)) min_at_dim = sorted_at_dim[0][d] candidate_min_at_dim = list( filter(lambda point: point[d] == min_at_dim, sorted_at_dim)) self.assertIn(self.tree.find_min(d, 0, self.tree.root), candidate_min_at_dim) def test_delete(self): for p in self.points: self.tree.delete(p) self.assertFalse(self.tree.contain(p)) def test_nearest_neighbor(self): for p in self.points: self.assertListEqual(self.tree.find_nearest(p), p) random_points = np.random.randint(low=0, high=20, size=(20, self.dim)) for p in random_points: nearest_point = self.tree.find_nearest(p) points = np.array(self.points) self.assertEqual(min(linalg.norm(points - p, axis=1)), linalg.norm(nearest_point - p)) def test_k_nearest_neighbor(self): for p in self.points: self.assertListEqual(self.tree.find_k_nearest(p, 1), [p]) random_points = np.random.randint(low=0, high=20, size=(20, self.dim)) for p in random_points: nearest_points = self.tree.find_k_nearest(p, 5) nearest_points = np.array(nearest_points) points = np.unique(self.points, axis=0) n_dists = linalg.norm(nearest_points - p, axis=1) dists = np.sort(linalg.norm(points - p, axis=1))[:5] self.assertListEqual(n_dists.tolist(), dists.tolist())
from kdtree import KDTree data = [(1,2),(4,0),(8,3),(10,5),(9,8),(4,2)] tree = KDTree.construct_from_data(data) nearest = tree.query(query_point=(10,0), t=1) print(nearest)
print "\tEvaluated %d rows for condensed training set in %.2fs" %(counter, time.clock() - start) continue elif isPrinted == False: elapsed = (time.clock() - start) print ("Condensed Training data mapped to feature space in %.4fmin." % (elapsed/60)) print ("Boundary points evaluated: %d" % getTrainingData().__len__()) classificationTimeStart = time.clock() isPrinted = True # Implement the search of the next 5000 elements using a KDTree searchStartTime = time.clock() #test: Trying with KDTree kdTree = KDTree.construct_from_data(getTrainingData()) else: currentLine = getDataElementFrom(line) # nearest = evaluateLine(line, 1) nearest = kdTree.query(currentLine) confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1 if currentLine.lettr == nearest[0].lettr: properMatches += 1 # confusionMatrix[ord(nearest[0]) - 65][ord(nearest[1]) - 65] += 1 # # if nearest[0] == nearest[1]: # properMatches += 1 counter += 1
def __init__(self, dim): self._parents_map = {} self._kd = KDTree(dim)