def search(self, queries, topk=None, **kwargs): nq = queries.shape[0] dsub = self.encoder.ecdat['dsub'] nsubq = self.encoder.ecdat['nsubq'] ksub = self.encoder.ecdat['ksub'] centroids = self.encoder.ecdat['centroids'] distab = np.zeros((nsubq, ksub), np.single) dis = np.ones((nq, topk), np.single) * np.inf ids = np.ones((nq, topk), np.int32) * -1 profiler = Profiler() interval = 100 if nq >= 100 else 10 time_total = 0.0 # total time for all queries logging.info('Start Querying ...') for qry_id in xrange(nq): profiler.start("distab") # time for computing distance table # pre-compute the table of squared distance to centroids for qnt_id in range(nsubq): vsub = queries[qry_id:qry_id + 1, qnt_id * dsub:(qnt_id + 1) * dsub] distab[qnt_id:qnt_id + 1, :] = distFunc['euclidean']( centroids[qnt_id], vsub) profiler.end() profiler.start("distance") # time for computing the distances # add the tabulated distances to construct the distance estimators idsquerybase, disquerybase = self.sumidxtab(distab) profiler.end() profiler.start("knn") # time for finding the kNN realk = min(disquerybase.shape[0], topk) cur_ids = pq_knn(disquerybase, realk) profiler.end() profiler.start("result") # time for getting final result ids[qry_id, :] = idsquerybase[cur_ids] dis[qry_id, :] = disquerybase[cur_ids] profiler.end() if (qry_id + 1) % interval == 0: time_total += profiler.sum_overall() logging.info('\t%d/%d: %.3fms per query' % (qry_id + 1, nq, profiler.sum_average() * 1000)) logging.info("\t\t%s" % profiler.str_average()) profiler.reset() logging.info('Querying Finished!') time_total += profiler.sum_overall() logging.info("Average querying time: %.3fms" % (time_total * 1000 / nq)) return ids, dis
def search(self, queries, topk=None, **kwargs): nq = queries.shape[0] dsub = self.encoder.ecdat['dsub'] nsubq = self.encoder.ecdat['nsubq'] ksub = self.encoder.ecdat['ksub'] centroids = self.encoder.ecdat['centroids'] distab = np.zeros((nsubq, ksub), np.single) dis = np.ones((nq, topk), np.single) * np.inf ids = np.ones((nq, topk), np.int32) * -1 profiler = Profiler() interval = 100 if nq >= 100 else 10 time_total = 0.0 # total time for all queries logging.info('Start Querying ...') for qry_id in xrange(nq): profiler.start("distab") # time for computing distance table # pre-compute the table of squared distance to centroids for qnt_id in range(nsubq): vsub = queries[qry_id:qry_id+1, qnt_id*dsub:(qnt_id+1)*dsub] distab[qnt_id:qnt_id+1, :] = distFunc['euclidean']( centroids[qnt_id], vsub) profiler.end() profiler.start("distance") # time for computing the distances # add the tabulated distances to construct the distance estimators idsquerybase, disquerybase = self.sumidxtab(distab) profiler.end() profiler.start("knn") # time for finding the kNN realk = min(disquerybase.shape[0], topk) cur_ids = pq_knn(disquerybase, realk) profiler.end() profiler.start("result") # time for getting final result ids[qry_id, :] = idsquerybase[cur_ids] dis[qry_id, :] = disquerybase[cur_ids] profiler.end() if (qry_id+1) % interval == 0: time_total += profiler.sum_overall() logging.info( '\t%d/%d: %.3fms per query' % (qry_id+1, nq, profiler.sum_average() * 1000)) logging.info("\t\t%s" % profiler.str_average()) profiler.reset() logging.info('Querying Finished!') time_total += profiler.sum_overall() logging.info("Average querying time: %.3fms" % (time_total * 1000 / nq)) return ids, dis
def search(self, queries, topk=None, **kwargs): nn_coa = kwargs.get("nn_coa", 8) nq = queries.shape[0] dsub = self.encoder.ecdat["dsub"] nsubq = self.encoder.ecdat["nsubq"] ksub = self.encoder.ecdat["ksub"] centroids = self.encoder.ecdat["centroids"] coa_centroids = self.encoder.ecdat["coa_centroids"] centroids_l2norm = [] for i in xrange(nsubq): centroids_l2norm.append((centroids[i] ** 2).sum(1)) coa_centroids_l2norm = (coa_centroids ** 2).sum(1) distab = np.zeros((nsubq, ksub), np.single) dis = np.ones((nq, topk), np.single) * np.inf ids = np.ones((nq, topk), np.int32) * -1 profiler = Profiler() interval = 100 if nq >= 100 else 10 time_total = 0.0 # total time for all queries logging.info("Start Querying ...") for qry_id in xrange(nq): # Here `copy()` can ensure that you DONOT modify the queries query = queries[qry_id : qry_id + 1, :].copy() profiler.start("coa_knn") coa_dist = fast_euclidean(coa_centroids, query, coa_centroids_l2norm).reshape(-1) # profiler.end() # profiler.start("coa_knn_knn") coa_knn = pq_knn(coa_dist, nn_coa) profiler.end() profiler.start("distab+distance") query = query - coa_centroids[coa_knn, :] v_idsquerybase = [] v_disquerybase = [] for coa_idx in range(nn_coa): # pre-compute the table of squared distance to centroids for qnt_id in range(nsubq): vsub = query[coa_idx : coa_idx + 1, qnt_id * dsub : (qnt_id + 1) * dsub] distab[qnt_id : qnt_id + 1, :] = fast_euclidean(centroids[qnt_id], vsub, centroids_l2norm[qnt_id]) # construct the distance estimators from tabulated distances idsquerybase, disquerybase = self.sumidxtab(distab, coa_knn[coa_idx]) v_idsquerybase.append(idsquerybase) v_disquerybase.append(disquerybase) idsquerybase = np.hstack(tuple(v_idsquerybase)) disquerybase = np.hstack(tuple(v_disquerybase)) profiler.end() profiler.start("knn") # time for finding the kNN realk = min(disquerybase.shape[0], topk) cur_ids = pq_knn(disquerybase, realk) profiler.end() profiler.start("result") # time for getting final result ids[qry_id, :realk] = idsquerybase[cur_ids] dis[qry_id, :realk] = disquerybase[cur_ids] profiler.end() if (qry_id + 1) % interval == 0: time_total += profiler.sum_overall() logging.info("\t%d/%d: %.3fms per query" % (qry_id + 1, nq, profiler.sum_average() * 1000)) logging.info("\t\t%s" % profiler.str_average()) profiler.reset() logging.info("Querying Finished!") time_total += profiler.sum_overall() logging.info("Average querying time: %.3fms" % (time_total * 1000 / nq)) return ids, dis
def search(self, queries, topk=None, **kwargs): nn_coa = kwargs.get('nn_coa', 8) nq = queries.shape[0] dsub = self.encoder.ecdat['dsub'] nsubq = self.encoder.ecdat['nsubq'] ksub = self.encoder.ecdat['ksub'] centroids = self.encoder.ecdat['centroids'] coa_centroids = self.encoder.ecdat['coa_centroids'] centroids_l2norm = [] for i in xrange(nsubq): centroids_l2norm.append((centroids[i]**2).sum(1)) coa_centroids_l2norm = (coa_centroids**2).sum(1) distab = np.zeros((nsubq, ksub), np.single) dis = np.ones((nq, topk), np.single) * np.inf ids = np.ones((nq, topk), np.int32) * -1 profiler = Profiler() interval = 100 if nq >= 100 else 10 time_total = 0.0 # total time for all queries logging.info('Start Querying ...') for qry_id in xrange(nq): # Here `copy()` can ensure that you DONOT modify the queries query = queries[qry_id:qry_id + 1, :].copy() profiler.start("coa_knn") coa_dist = fast_euclidean(coa_centroids, query, coa_centroids_l2norm).reshape(-1) # profiler.end() # profiler.start("coa_knn_knn") coa_knn = pq_knn(coa_dist, nn_coa) profiler.end() profiler.start("distab+distance") query = query - coa_centroids[coa_knn, :] v_idsquerybase = [] v_disquerybase = [] for coa_idx in range(nn_coa): # pre-compute the table of squared distance to centroids for qnt_id in range(nsubq): vsub = query[coa_idx:coa_idx + 1, qnt_id * dsub:(qnt_id + 1) * dsub] distab[qnt_id:qnt_id + 1, :] = fast_euclidean( centroids[qnt_id], vsub, centroids_l2norm[qnt_id]) # construct the distance estimators from tabulated distances idsquerybase, disquerybase = self.sumidxtab( distab, coa_knn[coa_idx]) v_idsquerybase.append(idsquerybase) v_disquerybase.append(disquerybase) idsquerybase = np.hstack(tuple(v_idsquerybase)) disquerybase = np.hstack(tuple(v_disquerybase)) profiler.end() profiler.start("knn") # time for finding the kNN realk = min(disquerybase.shape[0], topk) cur_ids = pq_knn(disquerybase, realk) profiler.end() profiler.start("result") # time for getting final result ids[qry_id, :realk] = idsquerybase[cur_ids] dis[qry_id, :realk] = disquerybase[cur_ids] profiler.end() if (qry_id + 1) % interval == 0: time_total += profiler.sum_overall() logging.info('\t%d/%d: %.3fms per query' % (qry_id + 1, nq, profiler.sum_average() * 1000)) logging.info("\t\t%s" % profiler.str_average()) profiler.reset() logging.info('Querying Finished!') time_total += profiler.sum_overall() logging.info("Average querying time: %.3fms" % (time_total * 1000 / nq)) return ids, dis