def test_3_filter_galaxies(self): """Filter galaxies. Update 3/14/2022 - filter_galaxies is no longer returning grid_shape and coords_min parameters - instead they are calculated inside the main body of find_voids() for consistency among the 3 mask_mode types """ # Take a table of galaxy coordinates, the name of the survey, and the # output directory and returns astropy tables of the Cartesian # coordinates of the wall and field galaxies as well as the shape # of the grid on which the galaxies will be placed and the coordinates # of the lower left corner of the grid. f_wall, f_field = filter_galaxies(self.galaxies_shuffled, 'test_', '', dist_metric='redshift', ) # Check the wall galaxy coordinates gal_tree = neighbors.KDTree(self.gal) distances, indices = gal_tree.query(self.gal, k=4) dist3 = distances[:,3] TestVoidFinder.wall = self.gal[dist3 < (np.mean(dist3) + 1.5*np.std(dist3))] self.assertTrue(np.isclose(f_wall, TestVoidFinder.wall).all()) # Check the field galaxy coordinates field = self.gal[dist3 >= (np.mean(dist3) + 1.5*np.std(dist3))] self.assertTrue(np.isclose(f_field, field).all())
def kdd_Neigbors_2(dta, index_ano): # the input data should be under Pandas dataframe format. start_anp = index_ano X = list(map(lambda x: [x, dta.values[x][1]], np.arange(len(dta.values)))) #X = np.reshape(dta.value.values, (-1, 1)) tree = nb.KDTree(X, leaf_size=20) flag_finding = 0 initial_index = [] while flag_finding == 0: new_anomaly_point = find_anomaly_point(tree, X, index_ano, initial_index) if (len(index_ano) < len(new_anomaly_point)): initial_index = index_ano index_ano = np.array(new_anomaly_point, dtype=np.int32) else: flag_finding = 1 inverse_neighboor = index_ano print("Nearest Neighboor of Anomaly point ", len(inverse_neighboor)) plt.figure(1) plt.subplot(211) plt.plot(dta.value.values) plt.plot(np.array(inverse_neighboor, dtype=np.int32), dta.value.values[np.array(inverse_neighboor, dtype=np.int32)], 'o') plt.plot(np.array(start_anp, dtype=np.int32), dta.value.values[np.array(start_anp, dtype=np.int32)], 'x') plt.show() return inverse_neighboor
def build(self, embs, labels): self.labels = labels if type(embs) == np.ndarray: self.embs = embs else: self.embs = np.vstack(embs) self.tree = neighbors.KDTree(self.embs)
def build_kdt(X_norm, **kwargs): kdt_kwds = dict(leaf_size=40, metric="minkowski") kdt_kwds.update(kwargs) kdt = neighbours.KDTree(X_norm, **kdt_kwds) return kdt
def leiden_clustering(umap_res, resolution_range=(0, 1), random_state=2, kdtree_dist='euclidean'): tree = neighbors.KDTree(umap_res, metric=kdtree_dist) vals, i, j = [], [], [] for idx in range(umap_res.shape[0]): dist, ind = tree.query([umap_res[idx]], k=25) vals.extend(list(dist.squeeze())) j.extend(list(ind.squeeze())) i.extend([idx] * len(ind.squeeze())) print(len(vals)) ginput = sps.csc_matrix( (numpy.array(vals), (numpy.array(i), numpy.array(j))), shape=(umap_res.shape[0], umap_res.shape[0])) sources, targets = ginput.nonzero() edgelist = zip(sources.tolist(), targets.tolist()) G = ig.Graph(edges=list(edgelist)) optimiser = leidenalg.Optimiser() optimiser.set_rng_seed(random_state) profile = optimiser.resolution_profile(G, leidenalg.CPMVertexPartition, resolution_range=resolution_range, number_iterations=0) print([len(elt) for elt in profile]) return profile
def test_add_tracker(self, mock_write, mock_color, mock_mkdir): mock_mkdir.side_effect = fake_os print("\ntest add tracker") matcher = FakeMatcher() embs = np.eye(12) labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] matcher.build(embs, labels) # total 12 tracker zero = np.zeros(12) trackers = {} for i in range(12): vec = zero.copy() vec[i] = 1 tracker = create_tracker([vec] * 40) trackers[i] = tracker tracker_history = TrackersHistory() tracker_history.trackers = trackers tracker_history.current_id = 13 tracker_history.start_time = 0 tracker_history.labels = labels tracker_history.embs = [embs[i, :] for i in range(12)] history_matcher = neighbors.KDTree( embs, leaf_size=Config.Matcher.INDEX_LEAF_SIZE, metric='euclidean') tracker_history.history_matcher = history_matcher mock = MagicMock(side_effect=fake_os) with patch('os.mkdir', mock): tracker_history.add_tracker(trackers[0], matcher, Mock())
def __predict_proba(self, X): """ __predict_proba Private implementation of the predict_proba method. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) Returns ------- tuple list One list with the k-nearest neighbor's distances and another one with their indexes. Notes ----- If you wish to use our own KDTree implementation please comment the third line of this function and uncomment the first and second lines. """ #tree = KDTree(self.window.get_attributes_matrix(), metric='euclidean', # categorical_list=self.categorical_list, return_distance=True) tree = sk.KDTree(self.window.get_attributes_matrix(), self.leaf_size, metric='euclidean') dist, ind = tree.query(np.asarray(X), k=self.k) return dist, ind
def _compute_connectivity(positions, radius, add_self_edges): """Get the indices of connected edges with radius connectivity. Args: positions: Positions of nodes in the graph. Shape: [num_nodes_in_graph, num_dims]. radius: Radius of connectivity. add_self_edges: Whether to include self edges or not. Returns: senders indices [num_edges_in_graph] receiver indices [num_edges_in_graph] """ tree = neighbors.KDTree(positions) receivers_list = tree.query_radius(positions, r=radius) num_nodes = len(positions) senders = np.repeat(range(num_nodes), [len(a) for a in receivers_list]) receivers = np.concatenate(receivers_list, axis=0) if not add_self_edges: # Remove self edges. mask = senders != receivers senders = senders[mask] receivers = receivers[mask] return senders, receivers
def comparison_kd_tree_library(): #data n_attributes = 2 n_data = 100 data = np.column_stack(([9, 4, 5, 7, 8, 2], [6, 7, 4, 2, 1, 3])) #target target = np.random.uniform(0, 1, n_attributes) target = np.array([10, 10]) #kdtrees initialization kdtree_lucas = KDTree kdtree_scikit = neighbors.KDTree(data, metric='euclidean') #kdtrees query k = data.shape[0] distances_lucas, indices_lucas = kdtree_lucas.query(target, k) distances_scikit, indices_scikit = kdtree_scikit.query(target, k=k) nn_scikit = data[indices_scikit] nn_lucas = data[indices_lucas] nn_lucas = data[indices_lucas] #difference print('nearest neighbors lucas') print(nn_lucas) print('') print('nearest neighbors scikit') print(nn_scikit) print('') print('difference') print(nn_lucas - nn_scikit)
def __predict_proba(self, X): """ __predict_proba Private implementation of the predict_proba method. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) Returns ------- tuple list One list with the k-nearest neighbor's distances and another one with their indexes. """ # To use our own KDTree implementation please replace it as follows # tree = KDTree(self.window.get_attributes_matrix(), metric='euclidean', # nominal_attributes=self._nominal_attributes, return_distance=True) tree = sk.KDTree(self.window.get_attributes_matrix(), self.leaf_size, metric='euclidean') dist, ind = tree.query(np.asarray(X), k=self.n_neighbors) return dist, ind
def k_dist(X, metric, k=3): data = [] tree = sk.KDTree(X, leaf_size=30) for n, point in enumerate(X): dist, ind = tree.query([point], k=k) data.append(dist[0].tolist()[k - 1]) return data
def quantize(img, representative_points): quantized_img = np.zeros_like(img) tree = neighbors.KDTree(representative_points, leaf_size=30) for x in range(img.shape[0]): for y in range(img.shape[1]): _, index = tree.query([img[x][y]], k=1) quantized_img[x][y] = representative_points[index[0][0]] return quantized_img
def kdtree(df_reduced, lat_col, long_col, leaf_size, k): """ Takes in: Returns: """ position = df_reduced[[lat_col, long_col]] tree = neighbors.KDTree(position, leaf_size=leaf_size) dist, ind = tree.query([position][0], k=k) return tree, dist, ind
def store_best_threshold(self, stored_embeddings, dir): thresholds = np.arange(0, 1, 0.0025) embedding_size = 512 size = 0 for i, cls in enumerate(stored_embeddings): for embs in stored_embeddings[cls].values(): size += len(embs) plain_embeddings = np.zeros([size, embedding_size]) class_index = [] emb_i = 0 for i, cls in enumerate(stored_embeddings): for embs in stored_embeddings[cls].values(): for emb in embs: plain_embeddings[emb_i] = emb emb_i += 1 class_index.append(cls) embeddings = (plain_embeddings + 1.) / 2. kd_tree = neighbors.KDTree(embeddings, metric='euclidean') dists, recognized = np.zeros([size], dtype=np.float32), np.zeros([size], np.bool) for i, emb in enumerate(embeddings): dist, idx = kd_tree.query(emb.reshape([1, 512]), k=2) dist = dist[0][np.argmax(dist)] idx = idx[0][np.argmax(dist)] detected_class = class_index[idx] detected = detected_class == class_index[i] recognized[i] = detected dists[i] = dist # __import__('ipdb').set_trace() best_threshold = 0 max_detect = 0 if recognized.all(): best_threshold = np.max(dists) else: for threshold in thresholds: detected = len(dists[recognized & (dists < threshold)]) if detected > max_detect: max_detect = detected best_threshold = threshold threshold_file = os.path.join(dir, 'threshold.txt') # best_threshold = best_threshold * 1.1 print_fun('=' * 50) print_fun('Found best threshold = %s' % best_threshold) # print_fun('Written to %s.' % threshold_file) print_fun('=' * 50)
def getKDTrees(places): trees = {} for place_type in places.keys(): coords = [[i['lat'], i['lng']] for i in places[place_type]] X = np.array(coords) tree = neighbors.KDTree(X, leaf_size=2) trees[place_type] = tree return trees
def which_points_inside_source(source, target): # point-cloudsをnp.arrayに変更 s_points = np.asarray(source.pcd.points) t_points = np.asarray(target.pcd.points) # リガンドを0.5ずつ分割する cut_surface = [i for i in np.arange(s_points[:,2].max(), s_points[:,2].min(), -0.2)] # 内外判定 points_inside_idx = [] for l in range(len(cut_surface)-1): # c = 各層に対応するindex s_c = list((s_points[:,2] > cut_surface[l+1]) & (s_points[:,2] < cut_surface[l])) s_c = [i for i, x in enumerate(s_c) if x == True] t_c = list((t_points[:,2] > cut_surface[l+1]) & (t_points[:,2] < cut_surface[l])) t_c = [i for i, x in enumerate(t_c) if x == True] # 抽出 layer = s_points[s_c][:, 0:2] points = t_points[t_c][:, 0:2] # ポリゴンを作成する # 近傍探索 if len(layer)>2: tree = neighbors.KDTree(layer) order = [] q = 0 # query k = 2 # 最近傍の個数 # 最近傍探索 while len(order) != len(layer): _, idx = tree.query([layer[q]], k=k) if idx[0,k-1] not in order: order.append(idx[0,k-1]) q = idx[0,k-1] k = 2 else: k = k + 1 # ポリゴン作成 polygon = Polygon(layer[order]) # ポリゴン内に存在する点のindexを返す in_or_out = [Point(p).within(polygon) for p in points] c = [i for i, x in enumerate(in_or_out) if x == True] for i in c: points_inside_idx.append(t_c[i]) return points_inside_idx, t_points[points_inside_idx]
def classify_nearest_neighbor_kd_tree_sk(k): print('k = {}'.format(k)) labels = load_labels() song_samples = [] indexed_genres = [] for genre, song_genres_ids in labels.groupby('category'): print('Indexing genre: {}'.format(genre)) num_values = len(song_genres_ids.values) for i in range(int(num_values / 2)): val = song_genres_ids.values[i] song_id = val[0] song = pd.read_csv('song_data/training/{}'.format(song_id), header=None) for val in song.values: song_samples.append(val) indexed_genres.append(genre) kd_tree = nb.KDTree(np.vstack(song_samples)) total_count = 0 match_count = 0 for genre, song_genres_ids in labels.groupby('category'): print('Expected genre: {}'.format(genre)) num_values = len(song_genres_ids.values) for i in range(int(num_values / 2), num_values): val = song_genres_ids.values[i] song_id = val[0] song = pd.read_csv('song_data/training/{}'.format(song_id), header=None) genre_freqs = {} # s = np.mean(song) # split_song = np.array_split(song, 5, axis=0) # Split song into sections for s in song.values: # avg_song_val = np.mean(s) # Take average of each section genre_indices = kd_tree.query([s], k, return_distance=False) logging.debug('Length of indexed genres: {}'.format( len(indexed_genres))) logging.debug('genre_indices: {}'.format(genre_indices)) for index in genre_indices[0]: g = indexed_genres[index] genre_freqs[g] = genre_freqs.get(g, 0) + 1 actual_genre = max(genre_freqs, key=genre_freqs.get) print('Predicted genre: {}'.format(actual_genre)) total_count += 1 if genre == actual_genre: match_count += 1 print('Matched {} out of {} songs: {}%'.format( match_count, total_count, (match_count / total_count) * 100))
def main(): parser = argparse.ArgumentParser() parser.add_argument("events", help="Path to events in CSV format") args = parser.parse_args() events_path = args.events events = pd.read_csv(events_path) i = 19750 events = events.iloc[i:i + 2000] events["timestamp"] -= events["timestamp"].min() coordinates = events[["x", "y"]].values kdtree = skn.KDTree(coordinates, metric="euclidean") nn_distance = kdtree.query(coordinates, k=5)[0][:, 4] events = events.loc[nn_distance <= 3] nn_distance = nn_distance[nn_distance <= 3] fig = pp.figure(figsize=(8, 8)) ax = fig.add_subplot(111, projection="3d") ax.w_xaxis.line.set_visible(False) ax.w_yaxis.line.set_visible(False) ax.w_zaxis.line.set_visible(False) ax.set_xlabel("$x$") ax.set_ylabel("$t$") ax.set_zlabel("$y$") ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) ax.set_ylim(bottom=-1000, top=events["timestamp"].max()) ax.view_init(10, 60) ax.scatter3D(events["x"], np.full(len(events), -1000), events["y"], s=10, depthshade=False) ax.scatter3D(events["x"], events["timestamp"], events["y"], s=8, c=events["timestamp"], cmap="Reds") fig.savefig("hand.pdf", bbox_inches=mpt.Bbox.from_bounds(1, 1.4, 6.0, 4.7))
def __init__(self, resolution, width, height, landmarks, robot): m = int(height / resolution) n = int(width / resolution) self.belief = np.zeros((m, n)) + (float(1) / (m*n)) self.landmarks = landmarks self.landmarks_tree = neighbors.KDTree([(l.x,l.y) for l in landmarks]) self.resolution = resolution self.z_noise = robot.z_noise self.x_noise = robot.x_noise self.y_noise = robot.y_noise self.phit = robot.phit self.pfalse = robot.pfalse self.max_sense_dist = robot.max_sense_dist self.correspondence_hash = self.setup_correspondence_hash(m,n)
def perform_test(x_set, x_test, y_set, y_test, k, dist_met, approach): #Variables Initialization #Start Time start_time = time.time() #Prediction list to store the prediction for each data point pred = [] #Brute_Force Approach if approach == 'Brute_Force': #Iterate over each test point for pt_test in x_test: #Compute the distances between x set train and test points x_dist_list = [] for d in range(0, len(x_set)): x_dist = dist_met(x_set[d], pt_test) x_dist_list.append((x_dist, y_set[d])) #Sort the x distance list from smallest to largest distance x_dist_list.sort(key=lambda x: x[0]) #Compute the average for k nearest y values new_dist_list = x_dist_list[:k] y_tot = [] #Add in all y values to the y_tot list for y_dist in new_dist_list: y_tot.append(y_dist[1]) #Compute the average for the y values y_avg = sum(y_tot) / len(y_tot) #Add the average value for y into the prediction list pred.append(y_avg) #Compute RMSE value for between the predictions and the testing set error_test = rmse(pred, y_test) #k-d Tree Approach elif approach == 'kd_tree': kd_t = neighbors.KDTree(x_set) #Compute the distances and indices of the k nearest neighbours dist, ind = kd_t.query(x_test, k=k) #Add the predicted values of y to the list pred = np.sum(y_set[ind], axis=1) / k #Compute RMSE value for between the predictions and the testing set error_test = rmse(pred, y_test) #Compute total time used to run the approach run_time = time.time() - start_time return run_time, error_test
def process(inputs, ctx, **kwargs): original, is_video = helpers.load_image(inputs, 'input') image = original.copy() if kwargs.get('detect') == 'false' or len(ctx.drivers) == 1: detect_driver = None reid_driver = ctx.drivers[0] else: detect_driver = ctx.drivers[0] reid_driver = ctx.drivers[1] # reid_input_shape = list(reid_driver.inputs.values())[0] input_name = list(reid_driver.inputs.keys())[0] if detect_driver is not None: boxes = get_boxes(detect_driver, image, threshold=0.3) else: boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) print(f'boxes={len(boxes)}') for box in boxes: box = box.astype(int) img = crop_by_box(image, box) img = cv2.resize(img, tuple(PARAMS['input_shape'][::-1]), interpolation=cv2.INTER_AREA) prepared = norm(img, need_transpose=PARAMS['driver_type'] == 'pytorch') prepared = np.expand_dims(prepared, axis=0) outputs = reid_driver.predict({input_name: prepared}) global kd_tree embedding = list(outputs.values())[0] embedding = (embedding + 1.) / 2. if not kd_tree: kd_tree = neighbors.KDTree(embedding, metric='euclidean') else: dist, idx = kd_tree.query(embedding, k=1) print(f'distance={dist}') cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color=(0, 250, 0), thickness=2, lineType=cv2.LINE_AA) if is_video: output = image else: _, buf = cv2.imencode('.jpg', image[:, :, ::-1]) output = buf.tostring() return {'output': output}
def over_sample_smote(x, n, k): minority, _ = x.shape new_x = [] tree = neighbors.KDTree(x) for i in range(n): # choose x_i randomly index = random.choice(range(minority)) # k-neighbor k_neighbor = tree.query(x[index, 0], k) x_mean = k_neighbor[1].mean() # gene a new x xigma* (k-neighbor mean) new_x_one = x[index, 0] + random.random() * (x_mean - x[index, 0]) new_x.append(new_x_one) new_x = np.matrix(new_x).T return new_x
def test_unsupervised_inputs(): """test the types of valid input into NearestNeighbors""" X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) dist1, ind1 = nbrs_fid.kneighbors(X) nbrs = neighbors.NearestNeighbors(n_neighbors=1) for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)): nbrs.fit(input) dist2, ind2 = nbrs.kneighbors(X) assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
async def calculate_z_value(alpha, limit_size, normal_point, result_dta, Z): X = list( map(lambda x: [x, result_dta.values[x][1]], np.arange(len(result_dta.values)))) # dt=DistanceMetric.get_metric('pyfunc',func=mydist) tree = nb.KDTree(X, leaf_size=50) #print(normal_point) #await asyncio.sleep(1) await loop.run_in_executor(ProcessPoolExecutor(), sleep, delay) nomaly_neighboor = np.array(cmfunc.find_inverneghboor_of_point( tree, X, normal_point, limit_size), dtype=np.int32) #nomaly_neighboor = np.array(await loop.run_in_executor(ThreadPoolExecutor(), cmfunc.find_inverneghboor_of_point, tree, X, normal_point, limit_size), dtype=np.int32) for NN_pair in nomaly_neighboor: Z[NN_pair[1]] = Z[NN_pair[1]] + (1 - result_dta['anomaly_score'][normal_point]) - NN_pair[0] * alpha if (1 - result_dta['anomaly_score'][normal_point]) - \ NN_pair[0] * alpha > 0 else \ Z[NN_pair[1]] return True
def create_graph(nodes, k): g = nx.Graph() tree = neighbors.KDTree(nodes) for n1 in nodes: ind = tree.query([n1], k, return_distance=False)[0] #print(ind) for j in ind: n2 = nodes[j] #print(n2) #print(j) if n2 == n1: continue #print('n1, n2',n1,n2) if can_connect(n1, n2): dist = np.linalg.norm(np.array(n1) - np.array(n2)) g.add_edge(n1, n2, weight=dist) return g
def fit_predict(self, X): """ Performs clustering on data X and returns cluster labels :param X: array with data :return: array of cluster labels """ print("# Fit predict ( eps = ", self.eps, ", min_samples = ", self.min_samples, ") ...") start_time = time.time() clusters = 0 labels = [0] * len(X) tree = sknei.KDTree(X) for i in range(len(X)): if labels[i] > 0: # point is already marked continue # get indexes of X[i] neighbours within the distance eps if self.metric == 'euclidean': N = tree.query_radius([X[i]], r=self.eps)[0] if len(N) < self.min_samples: # mark as a noise labels[i] = -1 continue clusters += 1 labels[i] = clusters index = 0 while index < len(N): # over all neighbours of point X[i] if labels[N[index]] > 0: # neighbour is already in the cluster index += 1 continue labels[N[index]] = clusters # get indexes of X[N[index]] neighbours within the distance eps if self.metric == 'euclidean': Nq = tree.query_radius([X[N[index]]], r=self.eps)[0] if len(Nq) >= self.min_samples: for n in Nq: if n not in N: N = np.append(N, n) index += 1 print("#", clusters, " clusters found.") print("# Fit predict finished in ", (time.time() - start_time), " sec.") return np.array(labels)
async def calculate_z_value(alpha, limit_size, normal_point, result_dta, Z): if normal_point > 50 and normal_point < (len(result_dta.values) - 50): point_range = np.arange(normal_point - 50, normal_point + 50) elif normal_point <= 50: point_range = np.arange(0, 100) elif normal_point >= (len(result_dta.values) - 50): point_range = np.arange(len(result_dta.values) - 100, len(result_dta.values) - 1) X = list(map(lambda x: [x, result_dta.values[x][1]], point_range)) tree = nb.KDTree(X, leaf_size=50) #print(normal_point) #await asyncio.sleep(1) await loop.run_in_executor(ProcessPoolExecutor(), sleep, delay) nomaly_neighboor = np.array(cmfunc.find_inverneghboor_of_point(tree, X, normal_point, limit_size), dtype=np.int32) #nomaly_neighboor = np.array(await loop.run_in_executor(ThreadPoolExecutor(), cmfunc.find_inverneghboor_of_point, tree, X, normal_point, limit_size), dtype=np.int32) for NN_pair in nomaly_neighboor: Z[NN_pair[1]] = Z[NN_pair[1]] + (1 - result_dta['anomaly_score'][normal_point]) - NN_pair[0] * alpha if (1 - result_dta['anomaly_score'][normal_point]) - \ NN_pair[0] * alpha > 0 else \ Z[NN_pair[1]] return True
def _kd_match(self, treated_group, control_group, observation_count): tree = sk.KDTree([[x] for x in control_group], leaf_size=1, metric='minkowski', p=2) matches = self._make_match_array(treated_group, control_group) # for match in treated_group.index: # dist, ind = tree.query(treated_group[match], k=1, breadth_first=True) # matches[match] = control_group.index[ind[0]][0] queries = treated_group[treated_group.index] dist, ind = tree.query([[x] for x in queries], k=1, breadth_first=True) matches[treated_group.index] = control_group.index[[ x for x in ind ]].values.flatten() return matches
def DBSCAN(D, eps, MinPts): labels = [0]*len(D) kdTree = neighbors.KDTree(D) C = 0 for P in range(0, len(D)): if not (labels[P] == 0): continue NeighborPts = kdTree.query_radius(D[P].reshape(1,-1), r=eps)[0] if (NeighborPts.shape[0] < MinPts): labels[P] = -1 else: C += 1 growCluster(D, kdTree, labels, P, NeighborPts, C, eps, MinPts) return labels
def build_kdtree(X, relative_scales=None,**kwargs): """ Build a KD-tree from the finite values in the given array. """ offset = np.mean(X, axis=0) if relative_scales is None: # Whiten the data. relative_scales = np.ptp(X, axis=0) X_norm = (X - offset)/relative_scales kdt_kwds = dict(leaf_size=40, metric="minkowski") kdt_kwds.update(kwargs) kdt = neighbours.KDTree(X_norm, **kdt_kwds) return (kdt, relative_scales, offset)