def fit(self, X): if X.dtype != numpy.float32: X = X.astype(numpy.float32) if self._metric == 'angular': X = sklearn.preprocessing.normalize(X, axis=1, norm='l2') self._index_autotuned = mrpt.MRPTIndex(X) self._index_autotuned.build_autotune_sample(target_recall = None, k = self._k, n_test = 1000)
def fit(self, X): if self._metric == 'angular': X = sklearn.preprocessing.normalize(X, axis=1, norm='l2') self._index = mrpt.MRPTIndex(X, depth=self._depth, n_trees=self._n_trees) self._index.build()
def RunAnnMrpt(): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') train, label = SplitTrainData(self.dataset) # Get all the parameters. if "k" in options: k = int(options.pop("k")) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) return -1 else: Log.Fatal( "Required option: Number of furthest neighbors to find.") return -1 build_dict = {} run_dict = {} if "num_trees" in options: build_dict["n_trees"] = int(options.pop("num_trees")) else: Log.Fatal("Required option: Number of trees to build") return -1 if "depth" in options: build_dict["depth"] = int(options.pop("depth")) else: build_dict["depth"] = 2 # Not sure... just a default... if "votes_required" in options: run_dict["votes_required"] = int(options.pop("votes_required")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") with totalTimer: try: # Perform Approximate Nearest-Neighbors. acc = 0 index = mrpt.MRPTIndex(np.float32(train), **build_dict) index.build() approximate_neighbors = np.zeros((len(queryData), k)) for i in range(len(queryData)): approximate_neighbors[i] = index.ann( np.float32(queryData[i]), k, **run_dict) except Exception as e: return -1 return totalTimer.ElapsedTime()
def metric(self): totalTimer = Timer() with totalTimer: index = mrpt.MRPTIndex(np.float32(self.data[0])) index.build(**self.build_dict) neighbors = np.zeros((len(self.data[1]), self.k)) for i in range(len(self.data[1])): neighbors[i] = index.ann(np.float32(self.data[1][i]), self.k, **self.run_dict) metric = {} metric["runtime"] = totalTimer.ElapsedTime() return metric
def RunAnnMrpt(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') train, label = SplitTrainData(self.dataset) # Get all the parameters. k = re.search("-k (\d+)", options) n = re.search("-n (\d+)", options) # Number of trees. d = re.search("-d (\d+)", options) # The tree depth. v = re.search("-v (\d+)", options) # Number of votes_required. if not k: Log.Fatal( "Required option: Number of furthest neighbors to find.") q.put(-1) return -1 else: k = int(k.group(1)) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 if not n: Log.Fatal("Required option: Number of trees to build") q.put(-1) return -1 else: n = int(n.group(1)) d = 5 if not d else int(d.group(1)) v = 4 if not v else int(v.group(1)) with totalTimer: try: # Perform Approximate Nearest-Neighbors. acc = 0 index = mrpt.MRPTIndex(np.float32(train), depth=d, n_trees=n) index.build() approximate_neighbors = np.zeros((len(queryData), k)) for i in range(len(queryData)): approximate_neighbors[i] = index.ann(np.float32( queryData[i]), k, votes_required=v) except Exception as e: Log.Info(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
reacll.append(annoyRecall) algorithm.append('Annoy-trees-' + str(numTrees)) construciotnTimes.append(constructionTime) searchTimes.append(searchTime) avgdistances.append(avgDist) #mrpt multi RP-tree import mrpt for a in [(0.5, 5), (0.6, 6), (0.8, 8), (0.9, 10)]: m = a[1] target_recall = a[0] startTime = time.perf_counter() index = mrpt.MRPTIndex(train.astype(np.float32)) index.build_autotune_sample(0.65, k, trees_max=10) end_time = time.perf_counter() constructionTime = end_time - startTime mrtpquery = query.astype(np.float32) rez = [] dist = [] startTime = time.perf_counter() for q in mrtpquery: res, d = index.ann(q, return_distances=True) rez.append(res) dist.append(d) end_time = time.perf_counter()
# Generate synthetic test data k = 10; n_queries = 100 data = np.dot(np.random.rand(1e5,5), np.random.rand(5,100)).astype('float32') queries = np.dot(np.random.rand(n_queries,5), np.random.rand(5,100)).astype('float32') # Solve exact nearest neighbors with standard methods from scipy and numpy for reference exact_search_time = time() exact_neighbors = np.zeros((n_queries, k)) for i in range(n_queries): exact_neighbors[i] = np.argsort(cdist([queries[i]], data))[0,:k] exact_search_time = time() - exact_search_time # Offline phase: Indexing the data. This might take some time. indexing_time = time() index = mrpt.MRPTIndex(data, depth=5, n_trees=100) index.build() indexing_time = time() - indexing_time # Online phase: Finding nearest neighbors stupendously fast. approximate_search_time = time() approximate_neighbors = np.zeros((n_queries, k)) for i in range(n_queries): approximate_neighbors[i] = index.ann(queries[i], k, votes_required=4) approximate_search_time = time() - approximate_search_time # Print some stats print ('Indexing time: %1.3f seconds' %indexing_time) print ('%d approximate queries time: %1.3f seconds' %(n_queries, approximate_search_time)) print ('%d exact queries time: %1.3f seconds' %(n_queries, exact_search_time))
def fit(self, X): self._index = mrpt.MRPTIndex(X, depth=self._depth, n_trees=self._n_trees) self._index.build()