class Matcher: def __init__(self, cfg): self.mutual_best=cfg['mutual_best'] self.ratio_test=cfg['ratio_test'] self.ratio=cfg['ratio'] self.use_cuda=cfg['cuda'] self.flann=FLANN() if self.use_cuda: self.match_fn_1=lambda desc0,desc1: find_nearest_point_idx(desc1, desc0) self.match_fn_2=lambda desc0,desc1: find_first_and_second_nearest_point(desc1, desc0) else: self.match_fn_1=lambda desc0,desc1: self.flann.nn(desc1, desc0, 1, algorithm='linear') self.match_fn_2=lambda desc0,desc1: self.flann.nn(desc1, desc0, 2, algorithm='linear') def match(self,desc0,desc1,*args,**kwargs): mask=np.ones(desc0.shape[0],dtype=np.bool) if self.ratio_test: idxs,dists = self.match_fn_2(desc0,desc1) dists=np.sqrt(dists) # note the distance is squared ratio_mask=dists[:,0]/dists[:,1]<self.ratio mask&=ratio_mask idxs=idxs[:,0] else: idxs,_=self.match_fn_1(desc0,desc1) if self.mutual_best: idxs_mutual,_=self.match_fn_1(desc1,desc0) mutual_mask = np.arange(desc0.shape[0]) == idxs_mutual[idxs] mask&=mutual_mask matches=np.concatenate([np.arange(desc0.shape[0])[:,None],idxs[:,None]],axis=1) matches=matches[mask] return matches
def assign_nearest_jobs(agent_idle, agent_job, agent_pos, blocked, jobs, left_jobs, n): from pyflann import FLANN children = [] starts = [] ends = [] ends_job = [] for left_job in left_jobs: # this makes many children ... ends.append(left_job[0]) ends_job.append(jobs.index(left_job)) for i_a in range(len(agent_pos)): if agent_job[i_a]: # has assignment i_j = agent_job[i_a][-1] starts.append(jobs[i_j][0]) else: starts.append(agent_pos[i_a]) flann = FLANN() result, dists = flann.nn( np.array(ends, dtype=float), np.array(starts, dtype=float), (n if len(ends) >= n else len(ends)), algorithm="kmeans", branching=32, iterations=7, checks=16) assert len(agent_pos) == len(result), "Not the right amount of results" for i_a in range(len(agent_pos)): if len(result.shape) == 1: result = np.array(list(map(lambda x: [x, ], result))) for res in result[i_a]: agent_job_new = agent_job.copy() agent_job_new[i_a] += (ends_job[res],) children.append(comp2state(tuple(agent_job_new), agent_idle, blocked)) return children
def get_closest(possible_starts, free_tasks_starts, grid, n): flann = FLANN() result, dists = flann.nn(possible_starts, free_tasks_starts, n, algorithm="kmeans", branching=32, iterations=7, checks=16) lengths = [] nearestss = [] paths = [] INF = 2 * np.max(np.max(dists)) for i in range(n): temp_nearest = np.unravel_index(np.argmin(dists), [len(possible_starts), n]) dists[temp_nearest] = INF nearestss.append(temp_nearest) temp_i_possible_starts = result[temp_nearest] temp_i_free_tasks_start = temp_nearest[0] p, _ = path(tuple(possible_starts[temp_i_possible_starts]), tuple(free_tasks_starts[temp_i_free_tasks_start]), grid, []) if p: lengths.append(len(p)) paths.append(p) best_path = np.argmin(lengths) nearest = nearestss[best_path] i_free_tasks_start = nearest[0] i_possible_starts = result[nearest] return i_free_tasks_start, i_possible_starts, paths[best_path]
class NearestFilter: def __init__(self, k: int): self.k = k self.flann = FLANN() def filter(self, v, points, labels): [neighbours_i ], _ = self.flann.nn(points, v.astype('float32'), num_neighbors=min(self.k, len(points)), algorithm='linear') return points[neighbours_i], labels[neighbours_i]
def create_affinity(X, knn, scale=None, alg="annoy", savepath=None, W_path=None): N, D = X.shape if W_path is not None: if W_path.endswith('.mat'): W = sio.loadmat(W_path)['W'] elif W_path.endswith('.npz'): W = sparse.load_npz(W_path) else: print('Compute Affinity ') start_time = timeit.default_timer() if alg == "flann": print('with Flann') flann = FLANN() knnind, dist = flann.nn(X, X, knn, algorithm="kdtree", target_precision=0.9, cores=5) # knnind = knnind[:,1:] else: nbrs = NearestNeighbors(n_neighbors=knn).fit(X) dist, knnind = nbrs.kneighbors(X) row = np.repeat(range(N), knn - 1) col = knnind[:, 1:].flatten() if scale is None: data = np.ones(X.shape[0] * (knn - 1)) elif scale is True: scale = np.median(dist[:, 1:]) data = np.exp((-dist[:, 1:]**2) / (2 * scale**2)).flatten() else: data = np.exp((-dist[:, 1:]**2) / (2 * scale**2)).flatten() W = sparse.csc_matrix((data, (row, col)), shape=(N, N), dtype=np.float) W = (W + W.transpose(copy=True)) / 2 elapsed = timeit.default_timer() - start_time print(elapsed) if isinstance(savepath, str): if savepath.endswith('.npz'): sparse.save_npz(savepath, W) elif savepath.endswith('.mat'): sio.savemat(savepath, {'W': W}) return W
def match(desc1, desc2, dist_ratio=0.6, num_trees=4): flann = FLANN() # result, dists = flann.nn(desc2, desc1, 2, algorithm="kmeans", # branching=32, iterations=7, checks=16) result, dists = flann.nn(desc2, desc1, 2, algorithm='kdtree', trees=num_trees) matchscores = zeros((desc1.shape[0]), 'int') for idx1, (idx2, _idx_second_nearest) in enumerate(result): nearest, second_nearest = dists[idx1] if nearest < dist_ratio * second_nearest: matchscores[idx1] = idx2 return matchscores
def nn_match(descs1, descs2): """ Perform nearest neighbor match, using descriptors. This function uses pyflann :param descs1: descriptors from image 1, (N1, D) :param descs2: descriptors from image 2, (N2, D) :return indices: indices into keypoints from image 2, (N1, D) """ # diff = descs1[:, None, :] - descs2[None, :, :] # diff = np.linalg.norm(diff, ord=2, axis=2) # indices = np.argmin(diff, axis=1) # flann = cv2.FlannBasedMatcher_create() # matches = flann.match(descs1.astype(np.float32), descs2.astype(np.float32)) # indices = [x.trainIdx for x in matches] flann = FLANN() indices, _ = flann.nn(descs2, descs1, algorithm="kdtree", trees=4) return indices
class KNeighborsClassifier(): def __init__(self, n_neighbors=5,weights='uniform'): """hyper parameters of teh FLANN algorithm""" self.algrithm_choice = "kmeans" self.branching = 32 self.iterations = 7 self.checks = 16 """Basic KNN parameters""" self.n_neighbors = n_neighbors self.weights = weights self.flann = FLANN() def fit(self,X,Y): self.train_data = np.asarray(X).astype(np.float32) if Y.ndim == 1 or Y.ndim == 2 and Y.shape[1] == 1: if Y.ndim != 1: warnings.warn("A column-vector y was passed when a 1d array " "was expected. Please change the shape of y to " "(n_samples, ), for example using ravel().", DataConversionWarning, stacklevel=2) print("XXXdasdasdaX!!!") self.outputs_2d_ = False Y = Y.reshape((-1, 1)) print(Y.shape) else: self.outputs_2d_ = True self.classes_ = [] self.train_label = np.empty(Y.shape, dtype=np.int) for k in range(self.train_label.shape[1]): classes, self.train_label[:, k] = np.unique(Y[:, k], return_inverse=True) self.classes_.append(classes) if not self.outputs_2d_: self.classes_ = self.classes_[0] self.train_label = self.train_label.ravel() def predict(self, X, n_neighbors=None): """Predict the class labels for the provided data. Parameters ---------- X : array-like, shape (n_queries, n_features), \ or (n_queries, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of shape [n_queries] or [n_queries, n_outputs] Class labels for each data sample. """ if n_neighbors is not None: self.n_neighbors = n_neighbors X = check_array(X, accept_sparse='csr') X = X.astype(np.float32) neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self.train_label if not self.outputs_2d_: _y = self.train_label.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_queries = X.shape[0] weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred return y_pred def kneighbors(self,test_data): nearest_neighbours,dists = self.flann.nn(self.train_data,test_data,self.n_neighbors,algorithm=self.algrithm_choice, branching=self.branching, iterations=self.iterations, checks=self.checks) if len(nearest_neighbours.shape) == 1: nearest_neighbours = nearest_neighbours.reshape((-1, 1)) dists = dists.reshape((-1, 1)) return dists, nearest_neighbours
def stacksize(since=0.0): """Return stack size in bytes. """ return _VmB('VmStk:') - since if __name__ == '__main__': print('Profiling Memory usage for pyflann; CTRL-C to stop.') print('Increasing total process memory, relative to the python memory, ') print('implies a memory leak in the external libs.') print('Increasing python memory implies a memory leak in the python code.') h = hpy() while True: s = str(h.heap()) print('Python: %s; Process Total: %s' % (s[: s.find('\n')], memory())) X1 = rand(50000, 2) X2 = rand(50000, 2) pf = FLANN() nnlist = pf.nn(X1, X2) del X1 del X2 del nnlist del pf gc.collect()
def make_test(test_start=1000, test_end=1050): f1 = open('states.pkl', 'r') f2 = open('states_for_test.pkl', 'r') data_states = cPickle.load(f1) test_states = cPickle.load(f2) f1.close() f2.close() time_brute = [] time_sk_kd = [] time_sk_ball = [] time_kdtree = [] time_annoy = [] time_flann = [] time_brute_tot = time_sk_kd_tot = time_sk_ball_tot = time_kdtree_tot = time_annoy_tot = time_flann_tot = 0 kdtree_tree = None for items in xrange(test_start, test_end): print "item:", items ground_truth = np.zeros((test_num_for_each, K), dtype=np.int32) time_brute_start = time.time() for no_test in xrange(test_num_for_each): distance_list = [] current_state = test_states[items, no_test] for target in xrange(items): target_state = data_states[target] distance_list.append(DistanceNode(np.sum(np.absolute(current_state - target_state)**2), target)) smallest = heapq.nsmallest(K, distance_list, key=lambda x: x.distance) ground_truth[no_test] = [x.index for x in smallest] time_brute_end = time.time() time_brute.append(time_brute_end - time_brute_start) time_brute_tot += time_brute[-1] # print ground_truth time_sk_kd_start = time.time() tree = KDTree(data_states[:items, :]) dist, indices = tree.query(test_states[items], K) time_sk_kd_end = time.time() time_sk_kd.append(time_sk_kd_end - time_sk_kd_start) time_sk_kd_tot += time_sk_kd[-1] # print indices time_sk_ball_start = time.time() tree = BallTree(data_states[:items, :], 10000) dist, indices = tree.query(test_states[items], K) time_sk_ball_end = time.time() time_sk_ball.append(time_sk_ball_end - time_sk_ball_start) time_sk_ball_tot += time_sk_ball[-1] # print indices """ annoy is absolutely disappointing for its low speed and poor accuracy. """ time_annoy_start = time.time() annoy_result = np.zeros((test_num_for_each, K), dtype=np.int32) tree = AnnoyIndex(dimension_result) for i in xrange(items): tree.add_item(i, data_states[i, :]) tree.build(10) for no_test in xrange(test_num_for_each): current_state = test_states[items, no_test] annoy_result[no_test] = tree.get_nns_by_vector(current_state, K) time_annoy_end = time.time() time_annoy.append(time_annoy_end - time_annoy_start) time_annoy_tot += time_annoy[-1] # print annoy_result # print annoy_result - indices """ flann is still not very ideal """ time_flann_start = time.time() flann = FLANN() result, dist = flann.nn(data_states[:items, :], test_states[items], K, algorithm='kdtree', trees=10, checks=16) time_flann_end = time.time() time_flann.append(time_flann_end - time_flann_start) time_flann_tot += time_flann[-1] # print result-indices """ This kdtree module is so disappointing!!!! It is 100 times slower than Sklearn and even slower than brute force, more over it even makes mistakes. This kdtree module supports online insertion and deletion. I thought it would be much faster than Sklearn KdTree which rebuilds the tree every time. But the truth is the opposite. """ # time_kdtree_start = time.time() # if kdtree_tree is None: # point_list = [MyTuple(data_states[i, :], i) for i in xrange(items)] # kdtree_tree = kdtree.create(point_list) # else: # point = MyTuple(data_states[items, :], items) # kdtree_tree.add(point) # kdtree_result = np.zeros((test_num_for_each, K), dtype=np.int32) # for no_test in xrange(test_num_for_each): # current_state = test_states[items, no_test] # smallest = kdtree_tree.search_knn(MyTuple(current_state, -1), K) # kdtree_result[no_test] = [x[0].data.pos for x in smallest] # time_kdtree_end = time.time() # time_kdtree.append(time_kdtree_end - time_kdtree_start) # time_kdtree_tot += time_kdtree[-1] # print kdtree_result # print kdtree_result-indices print 'brute force:', time_brute_tot print 'sklearn KDTree', time_sk_kd_tot print 'sklearn BallTree', time_sk_ball_tot print 'approximate annoy', time_annoy_tot print 'approximate flann', time_flann_tot print 'kdtree (deprecated)', time_kdtree_tot
for n in ratio: vol = [] com_num = [] for i in range(50): ind = np.random.randint(N, size=n) dpos = dpos_org.iloc[ind] #Al_pos = dpos.loc[dpos.element == 'Al', :] Sc_pos = dpos.loc[dpos.element == 'Sc', :] data_Sc = Sc_pos.loc[:, ['x', 'y', 'z']].values #ind_al = np.random.randint(5000, size=len(Al_pos)) #Al_pos =Al_pos.iloc[ind_al] ## Remove single atoms. results, dists = fl.nn(scale(data_Sc), scale(data_Sc), 8) # calculating the distance to 10 nearest neighbors cov_dists = np.asarray([np.std(d[1:]) for d in dists]) # Calculating the covariance to the distances viz = False if viz is True: fig = plt.figure() mng = plt.get_current_fig_manager() mng.full_screen_toggle() ax = fig.add_subplot(111) ax.hist(cov_dists) ax.set_xlabel('Covariance of Nearest Neighbor Distance') ax.set_ylabel('Frequency') ax.xaxis.label.set_size(26) ax.yaxis.label.set_size(26) for xtick, ytick in zip(ax.xaxis.get_major_ticks(), ax.yaxis.get_major_ticks()): xtick.label.set_fontsize(20)
class Test_PyFLANN_nn(unittest.TestCase): def setUp(self): self.nn = FLANN() ########################################################################## # The typical def test_nn_2d_10pt_kmeans(self): self.__nd_random_test(2, 2, algorithm='kdtree') def test_nn_2d_1000pt_kmeans(self): self.__nd_random_test(2, 1000, algorithm='kmeans') def test_nn_100d_1000pt_kmeans(self): self.__nd_random_test(100, 1000, algorithm='kmeans') def test_nn_500d_100pt_kmeans(self): self.__nd_random_test(500, 100, algorithm='kmeans') def test_nn_2d_1000pt_kdtree(self): self.__nd_random_test(2, 1000, algorithm='kdtree') def test_nn_100d_1000pt_kdtree(self): self.__nd_random_test(100, 1000, algorithm='kdtree') def test_nn_500d_100pt_kdtree(self): self.__nd_random_test(500, 100, algorithm='kdtree') def test_nn_2d_1000pt_linear(self): self.__nd_random_test(2, 1000, algorithm='linear') def test_nn_100d_50pt_linear(self): self.__nd_random_test(100, 50, algorithm='linear') def test_nn_2d_1000pt_composite(self): self.__nd_random_test(2, 1000, algorithm='composite') def test_nn_100d_1000pt_composite(self): self.__nd_random_test(100, 1000, algorithm='composite') def test_nn_500d_100pt_composite(self): self.__nd_random_test(500, 100, algorithm='composite') def test_nn_multtrees_2d_1000pt_kmeans(self): self.__nd_random_test(2, 1000, algorithm='kmeans', trees=8) def test_nn_multtrees_100d_1000pt_kmeans(self): self.__nd_random_test(100, 1000, algorithm='kmeans', trees=8) def test_nn_multtrees_500d_100pt_kmeans(self): self.__nd_random_test(500, 100, algorithm='kmeans', trees=8) ########################################################################## # Stress it should handle def test_nn_stress_1d_1pt_kmeans(self): self.__nd_random_test(1, 1, algorithm='kmeans') def test_nn_stress_1d_1pt_linear(self): self.__nd_random_test(1, 1, algorithm='linear') def test_nn_stress_1d_1pt_kdtree(self): self.__nd_random_test(1, 1, algorithm='kdtree') def test_nn_stress_1d_1pt_composite(self): self.__nd_random_test(1, 1, algorithm='composite') def __nd_random_test(self, dim, N, type=np.float32, num_neighbors=10, **kwargs): """ Make a set of random points, then pass the same ones to the query points. Each point should be closest to itself. """ np.random.seed(0) x = np.array(np.random.rand(N, dim), dtype=type) perm = np.random.permutation(N) idx, dists = self.nn.nn(x, x[perm], **kwargs) self.assertTrue(all(idx == perm)) # Make sure it's okay if we do make all the points equal x_mult_nn = np.concatenate([x for i in range(num_neighbors)]) nidx, ndists = self.nn.nn(x_mult_nn, x, num_neighbors=num_neighbors, **kwargs) correctness = 0.0 for i in range(N): correctness += (float( len( set(nidx[i]).intersection( [i + n * N for n in range(num_neighbors)]))) / num_neighbors) self.assertTrue( correctness / N >= 0.99, 'failed #1: N=%d,correctness=%f' % (N, correctness / N), ) # now what happens if they are slightly off x_mult_nn += (np.random.randn(x_mult_nn.shape[0], x_mult_nn.shape[1]) * 0.0001 / dim) n2idx, n2dists = self.nn.nn(x_mult_nn, x, num_neighbors=num_neighbors, **kwargs) for i in range(N): correctness += (float( len( set(n2idx[i]).intersection( [i + n * N for n in range(num_neighbors)]))) / num_neighbors) self.assertTrue( correctness / N >= 0.99, 'failed #2: N=%d,correctness=%f' % (N, correctness / N), )
files = os.listdir(src) filenames = [] vectors = [] for f in files: # need to convert into list of vectors if f.endswith('.npy'): filenames.append(f[:-8]) data = np.load(src + '/' + f) # print data.shape #vectors.append(np.reshape(data, (14 * 14, 512))) vectors.append(np.reshape(data, (28 * 28, 512))) flann = FLANN() distMap = {} if mode == 'chamfer': for i in range(0, len(filenames)): print('Computing chamfer for file ' + str(i)) imgDists = {} dataset = vectors[i] for j in range(0, len(filenames)): testset = vectors[j] _, dists = flann.nn(dataset, testset, 1) _, rdists = flann.nn(testset, dataset, 1) imgDists[filenames[j]] = np.asscalar( np.sum(dists) + np.sum(rdists)) distMap[filenames[i]] = imgDists with open(src + '/dists.json', 'w') as outfile: json.dump(distMap, outfile, sort_keys=True, indent=2)
class Test_PyFLANN_nn(unittest.TestCase): def setUp(self): self.nn = FLANN(log_level='warning') ########################################################################## # The typical def test_nn_2d_2pt(self): self.__nd_random_test_autotune(2, 2) def test_nn_autotune_2d_10pt(self): self.__nd_random_test_autotune(2, 10) # def test_nn_autotune_100d_1000pt(self): # self.__nd_random_test_autotune(100, 1000) # def test_nn_autotune_500d_100pt(self): # self.__nd_random_test_autotune(500, 100) # # #################################################################### # # Stress it should handle # def test_nn_stress_1d_1pt_kmeans_autotune(self): self.__nd_random_test_autotune(1, 1) def __ensure_list(self, arg): if not isinstance(arg, list): return [arg] else: return arg def __nd_random_test_autotune(self, dim, N, num_neighbors=1, **kwargs): """ Make a set of random points, then pass the same ones to the query points. Each point should be closest to itself. """ np.random.seed(0) x = np.random.rand(N, dim) xq = np.random.rand(N, dim) # perm = np.random.permutation(N) # compute ground truth nearest neighbors gt_idx, gt_dist = self.nn.nn(x, xq, algorithm='linear', num_neighbors=num_neighbors) for tp in [0.70, 0.80, 0.90]: nidx, ndist = self.nn.nn(x, xq, algorithm='autotuned', sample_fraction=1.0, num_neighbors=num_neighbors, target_precision=tp, checks=-2, **kwargs) correctness = 0.0 for i in range(N): l1 = self.__ensure_list(nidx[i]) l2 = self.__ensure_list(gt_idx[i]) correctness += float(len( set(l1).intersection(l2))) / num_neighbors correctness /= N self.assertTrue( correctness >= tp * 0.9, 'failed #1: targ_prec=%f, N=%d,correctness=%f' % (tp, N, correctness), )
from pyflann import FLANN import numpy as np # the base points dataset = np.array( [[1., 1, 1, 2, 3], [10, 10, 10, 3, 2], [100, 100, 2, 30, 1] ]) # the points to measure testset = np.array( [[1., 1, 1, 1, 1], [90, 90, 10, 10, 1] ]) flann = FLANN() result, dists = flann.nn( dataset, testset, 2, algorithm="kmeans", branching=32, iterations=7, checks=16) # the result is for each point in the testset the 2 (because on the config) closest points from the dataset print(result) print(dists) print("-----") dataset = np.random.rand(10000, 128) # 10 000 points with 128 dimensions testset = np.random.rand(1000, 128) # 1 000 points with 128 dimensions flann = FLANN() result, dists = flann.nn( dataset, testset, 5, algorithm="kmeans", branching=32, iterations=7, checks=16) print(result) print(dists) print(np.shape(dists))
def make_test(test_start=1000, test_end=1050): f1 = open('states.pkl', 'r') f2 = open('states_for_test.pkl', 'r') data_states = cPickle.load(f1) test_states = cPickle.load(f2) f1.close() f2.close() time_brute = [] time_sk_kd = [] time_sk_ball = [] time_kdtree = [] time_annoy = [] time_flann = [] time_brute_tot = time_sk_kd_tot = time_sk_ball_tot = time_kdtree_tot = time_annoy_tot = time_flann_tot = 0 kdtree_tree = None for items in xrange(test_start, test_end): print "item:", items ground_truth = np.zeros((test_num_for_each, K), dtype=np.int32) time_brute_start = time.time() for no_test in xrange(test_num_for_each): distance_list = [] current_state = test_states[items, no_test] for target in xrange(items): target_state = data_states[target] distance_list.append( DistanceNode( np.sum(np.absolute(current_state - target_state)**2), target)) smallest = heapq.nsmallest(K, distance_list, key=lambda x: x.distance) ground_truth[no_test] = [x.index for x in smallest] time_brute_end = time.time() time_brute.append(time_brute_end - time_brute_start) time_brute_tot += time_brute[-1] # print ground_truth time_sk_kd_start = time.time() tree = KDTree(data_states[:items, :]) dist, indices = tree.query(test_states[items], K) time_sk_kd_end = time.time() time_sk_kd.append(time_sk_kd_end - time_sk_kd_start) time_sk_kd_tot += time_sk_kd[-1] # print indices time_sk_ball_start = time.time() tree = BallTree(data_states[:items, :], 10000) dist, indices = tree.query(test_states[items], K) time_sk_ball_end = time.time() time_sk_ball.append(time_sk_ball_end - time_sk_ball_start) time_sk_ball_tot += time_sk_ball[-1] # print indices """ annoy is absolutely disappointing for its low speed and poor accuracy. """ time_annoy_start = time.time() annoy_result = np.zeros((test_num_for_each, K), dtype=np.int32) tree = AnnoyIndex(dimension_result) for i in xrange(items): tree.add_item(i, data_states[i, :]) tree.build(10) for no_test in xrange(test_num_for_each): current_state = test_states[items, no_test] annoy_result[no_test] = tree.get_nns_by_vector(current_state, K) time_annoy_end = time.time() time_annoy.append(time_annoy_end - time_annoy_start) time_annoy_tot += time_annoy[-1] # print annoy_result # print annoy_result - indices """ flann is still not very ideal """ time_flann_start = time.time() flann = FLANN() result, dist = flann.nn(data_states[:items, :], test_states[items], K, algorithm='kdtree', trees=10, checks=16) time_flann_end = time.time() time_flann.append(time_flann_end - time_flann_start) time_flann_tot += time_flann[-1] # print result-indices """ This kdtree module is so disappointing!!!! It is 100 times slower than Sklearn and even slower than brute force, more over it even makes mistakes. This kdtree module supports online insertion and deletion. I thought it would be much faster than Sklearn KdTree which rebuilds the tree every time. But the truth is the opposite. """ # time_kdtree_start = time.time() # if kdtree_tree is None: # point_list = [MyTuple(data_states[i, :], i) for i in xrange(items)] # kdtree_tree = kdtree.create(point_list) # else: # point = MyTuple(data_states[items, :], items) # kdtree_tree.add(point) # kdtree_result = np.zeros((test_num_for_each, K), dtype=np.int32) # for no_test in xrange(test_num_for_each): # current_state = test_states[items, no_test] # smallest = kdtree_tree.search_knn(MyTuple(current_state, -1), K) # kdtree_result[no_test] = [x[0].data.pos for x in smallest] # time_kdtree_end = time.time() # time_kdtree.append(time_kdtree_end - time_kdtree_start) # time_kdtree_tot += time_kdtree[-1] # print kdtree_result # print kdtree_result-indices print 'brute force:', time_brute_tot print 'sklearn KDTree', time_sk_kd_tot print 'sklearn BallTree', time_sk_ball_tot print 'approximate annoy', time_annoy_tot print 'approximate flann', time_flann_tot print 'kdtree (deprecated)', time_kdtree_tot