class LOPQRetriever(BaseRetriever): def __init__(self,name,approximator): super(LOPQRetriever, self).__init__(name=name,approximator=approximator,algorithm="LOPQ") self.approximate = True self.name = name self.loaded_entries = {} self.entries = [] self.support_batching = False self.approximator = approximator self.approximator.load() self.searcher = LOPQSearcher(model=self.approximator.model) def load_index(self,numpy_matrix=None,entries=None): codes = [] ids = [] last_index = len(self.entries) for i, e in enumerate(entries): codes.append((tuple(e['codes'][0]),tuple(e['codes'][1]))) ids.append(i+last_index) self.entries.append(e) self.searcher.add_codes(codes,ids) def nearest(self,vector=None,n=12): results = [] pca_vec = self.approximator.get_pca_vector(vector) results_indexes, visited = self.searcher.search(pca_vec,quota=n) for r in results_indexes: results.append(self.entries[r.id]) return results
class LOPQRetriever(object): """ Deprecated and soon to be removed """ def __init__(self, name, approximator): self.approximate = True self.name = name self.loaded_entries = set() self.entries = [] self.support_batching = False self.approximator = approximator self.approximator.load() self.searcher = LOPQSearcher(model=self.approximator.model) def add_entries(self, entries, video_id, entry_type): codes = [] ids = [] last_index = len(self.entries) for i, e in enumerate(entries): codes.append((tuple(e[1][0]), tuple(e[1][1]))) ids.append(i + last_index) self.entries.append({"id":e[0],"type":entry_type,"video":video_id}) self.searcher.add_codes(codes, ids) def nearest(self, vector=None, n=12): results = [] pca_vec = self.approximator.get_pca_vector(vector) results_indexes, visited = self.searcher.search(pca_vec, quota=n) for r in results_indexes: results.append(self.entries[r.id]) return results
class ApproximateIndexer(object): def __init__(self,index_name,model_path,lmdb_path,V=16, M=16): self.model = LOPQModel(V,M) self.index_name = index_name self.searcher = None self.model_path = model_path self.lmdb_path = lmdb_path def load(self): self.model.load_proto(self.model_path) def fit(self,train): print train.shape self.pca_reduction = PCA(n_components=256) self.pca_reduction.fit(train) train = self.pca_reduction.transform(train) self.P, self.mu = pca(train) train = np.dot(train, self.P) print train.shape self.model.fit(train, n_init=1) def transform(self,test): print test.shape test = self.pca_reduction.transform(test) test = test - self.mu test = np.dot(test,self.P) print test.shape return test def fit_model(self,train): self.fit(train) self.model.export_proto(self.model_path) self.searcher = LOPQSearcher(self.model) # LOPQSearcherLMDB(self.model,self.lmdb_path) def experiment(self,data): train, test = train_test_split(data, test_size=0.1) print data.shape,train.shape,test.shape nns = compute_all_neighbors(test, train) self.fit_model(train) self.searcher.add_data(self.transform(train)) recall, _ = get_recall(self.searcher, self.transform(test), nns) print 'Recall (V={}, M={}, subquants={}): {}'.format(self.model.V, self.model.M, self.model.subquantizer_clusters, str(recall)) def add_data(self,data): self.searcher.add_data(data) def search(self,x): return self.searcher.search(x,quota=100)
class LOPQ(BaseANN): def __init__(self, v): m = 4 self.name = 'LOPQ(v={}, m={})'.format(v, m) self._m = m self._model = LOPQModel(V=v, M=m) self._searcher = None def fit(self, X): X = numpy.array(X) X = X.astype(numpy.float32) self._model.fit(X) self._searcher = LOPQSearcher(self._model) self._searcher.add_data(X) def query(self, v, n): v = v.astype(numpy.float32) nns = self._searcher.search(v, quota=100) return nns
with open(annotation_path, 'r') as annotation_file: json_obj = json.load(annotation_file) if results is None: query_names = json_obj.keys() query_names = [str(query_name) for query_name in query_names] query_indexs = [] for query_name in query_names: tmp = np.where(names == query_name) if len(tmp) != 0 and len(tmp[0]) != 0: query_indexs.append(tmp[0][0]) else: print('skip query: ', query_name) query_features = np.squeeze(global_features[query_indexs]) # similarities = calculate_similarities(query_features, global_features) for query_feature in query_features: nns = searcher.search(query_feature, quota=100) print(nns.shape) print(nns[0]) results = dict() for query_idx, query_name in enumerate(query_names): cur_sim = similarities[query_idx] query_result = dict(map(lambda v: (names[v[0]], v[1]), cur_sim)) del query_result[query_name] results[query_name] = query_result mAPOffcial, precisions = evaluateOfficial( annotations=gtobj.annotations, results=results, relevant_labels=relevant_labels_mapping[task_name], dataset=gtobj.dataset, quiet=False) print('{} mAPOffcial is {}'.format(task_name, np.mean(mAPOffcial)))
def main(new=True): # data: 3000 x 128dim if not new: # load data data = np.load('./data.npy') else: data = np.vstack((np.random.rand(1000, 128), np.random.rand(1000, 128) + 1, np.random.rand(1000, 128) - 1)) print 'make data' # save data np.save('data.npy', data) # wanted to know this nearest neighbors x = np.ones(128) * 2 print 'naive implementation' start = time.time() dist = np.sum(np.power((data - x), 2), axis=1) res = np.argsort(dist) print res[0:10] # return indices; top 10 print time.time() - start, 's taken for naive NNsearch' model = None if not new: # load model model = LOPQModel.load_mat('params.mat') else: # Define a model and fit it to data model = LOPQModel(V=3, M=2, subquantizer_clusters=64) start = time.time() model.fit(data) print time.time() -start, 's taken for model fitting' # save model model.export_mat('params.mat') # Compute the LOPQ codes for a vector # if we define SC as subquantizer_clusters, # input vec(128dim); output: coarse codes(V, V), fine codes(SC, SC) because M = 2 """ for i in xrange(10): y = np.random.rand(128) code = model.predict(y) print 'output: ', code """ # Create a searcher to index data with the model searcher = LOPQSearcher(model) searcher.add_data(data) start = time.time() # Retrieve ranked nearest neighbors nns = searcher.search(x, quota=10) ans = [nns[0][i][0] for i in range(10)] print ans print time.time() -start, 's taken for prediction top 10' count = 0 for element in ans: if element in res[0:10]: count += 1 else: print 'accuracy: ', count, '/', 10