Exemplo n.º 1
0
Arquivo: vq.py Projeto: xdTin/hdidx
    def search(self, queries, topk=None, **kwargs):
        nq = queries.shape[0]

        dsub = self.encoder.ecdat['dsub']
        nsubq = self.encoder.ecdat['nsubq']
        ksub = self.encoder.ecdat['ksub']
        centroids = self.encoder.ecdat['centroids']

        distab = np.zeros((nsubq, ksub), np.single)
        dis = np.ones((nq, topk), np.single) * np.inf
        ids = np.ones((nq, topk), np.int32) * -1

        profiler = Profiler()
        interval = 100 if nq >= 100 else 10
        time_total = 0.0  # total time for all queries
        logging.info('Start Querying ...')
        for qry_id in xrange(nq):
            profiler.start("distab")  # time for computing distance table
            # pre-compute the table of squared distance to centroids
            for qnt_id in range(nsubq):
                vsub = queries[qry_id:qry_id + 1,
                               qnt_id * dsub:(qnt_id + 1) * dsub]
                distab[qnt_id:qnt_id + 1, :] = distFunc['euclidean'](
                    centroids[qnt_id], vsub)
            profiler.end()

            profiler.start("distance")  # time for computing the distances
            # add the tabulated distances to construct the distance estimators
            idsquerybase, disquerybase = self.sumidxtab(distab)
            profiler.end()

            profiler.start("knn")  # time for finding the kNN
            realk = min(disquerybase.shape[0], topk)
            cur_ids = pq_knn(disquerybase, realk)
            profiler.end()

            profiler.start("result")  # time for getting final result
            ids[qry_id, :] = idsquerybase[cur_ids]
            dis[qry_id, :] = disquerybase[cur_ids]
            profiler.end()

            if (qry_id + 1) % interval == 0:
                time_total += profiler.sum_overall()
                logging.info('\t%d/%d: %.3fms per query' %
                             (qry_id + 1, nq, profiler.sum_average() * 1000))
                logging.info("\t\t%s" % profiler.str_average())
                profiler.reset()
        logging.info('Querying Finished!')
        time_total += profiler.sum_overall()
        logging.info("Average querying time: %.3fms" %
                     (time_total * 1000 / nq))

        return ids, dis
Exemplo n.º 2
0
    def search(self, queries, topk=None,  **kwargs):
        nq = queries.shape[0]

        dsub = self.encoder.ecdat['dsub']
        nsubq = self.encoder.ecdat['nsubq']
        ksub = self.encoder.ecdat['ksub']
        centroids = self.encoder.ecdat['centroids']

        distab = np.zeros((nsubq, ksub), np.single)
        dis = np.ones((nq, topk), np.single) * np.inf
        ids = np.ones((nq, topk), np.int32) * -1

        profiler = Profiler()
        interval = 100 if nq >= 100 else 10
        time_total = 0.0    # total time for all queries
        logging.info('Start Querying ...')
        for qry_id in xrange(nq):
            profiler.start("distab")    # time for computing distance table
            # pre-compute the table of squared distance to centroids
            for qnt_id in range(nsubq):
                vsub = queries[qry_id:qry_id+1, qnt_id*dsub:(qnt_id+1)*dsub]
                distab[qnt_id:qnt_id+1, :] = distFunc['euclidean'](
                    centroids[qnt_id], vsub)
            profiler.end()

            profiler.start("distance")  # time for computing the distances
            # add the tabulated distances to construct the distance estimators
            idsquerybase, disquerybase = self.sumidxtab(distab)
            profiler.end()

            profiler.start("knn")       # time for finding the kNN
            realk = min(disquerybase.shape[0], topk)
            cur_ids = pq_knn(disquerybase, realk)
            profiler.end()

            profiler.start("result")    # time for getting final result
            ids[qry_id, :] = idsquerybase[cur_ids]
            dis[qry_id, :] = disquerybase[cur_ids]
            profiler.end()

            if (qry_id+1) % interval == 0:
                time_total += profiler.sum_overall()
                logging.info(
                    '\t%d/%d: %.3fms per query' %
                    (qry_id+1, nq, profiler.sum_average() * 1000))
                logging.info("\t\t%s" % profiler.str_average())
                profiler.reset()
        logging.info('Querying Finished!')
        time_total += profiler.sum_overall()
        logging.info("Average querying time: %.3fms" % (time_total * 1000 / nq))

        return ids, dis
Exemplo n.º 3
0
    def search(self, queries, topk=None, **kwargs):
        nn_coa = kwargs.get("nn_coa", 8)
        nq = queries.shape[0]

        dsub = self.encoder.ecdat["dsub"]
        nsubq = self.encoder.ecdat["nsubq"]
        ksub = self.encoder.ecdat["ksub"]
        centroids = self.encoder.ecdat["centroids"]
        coa_centroids = self.encoder.ecdat["coa_centroids"]

        centroids_l2norm = []
        for i in xrange(nsubq):
            centroids_l2norm.append((centroids[i] ** 2).sum(1))
        coa_centroids_l2norm = (coa_centroids ** 2).sum(1)

        distab = np.zeros((nsubq, ksub), np.single)
        dis = np.ones((nq, topk), np.single) * np.inf
        ids = np.ones((nq, topk), np.int32) * -1

        profiler = Profiler()
        interval = 100 if nq >= 100 else 10
        time_total = 0.0  # total time for all queries
        logging.info("Start Querying ...")
        for qry_id in xrange(nq):
            # Here `copy()` can ensure that you DONOT modify the queries
            query = queries[qry_id : qry_id + 1, :].copy()
            profiler.start("coa_knn")
            coa_dist = fast_euclidean(coa_centroids, query, coa_centroids_l2norm).reshape(-1)
            # profiler.end()
            # profiler.start("coa_knn_knn")
            coa_knn = pq_knn(coa_dist, nn_coa)
            profiler.end()

            profiler.start("distab+distance")
            query = query - coa_centroids[coa_knn, :]
            v_idsquerybase = []
            v_disquerybase = []
            for coa_idx in range(nn_coa):
                # pre-compute the table of squared distance to centroids
                for qnt_id in range(nsubq):
                    vsub = query[coa_idx : coa_idx + 1, qnt_id * dsub : (qnt_id + 1) * dsub]
                    distab[qnt_id : qnt_id + 1, :] = fast_euclidean(centroids[qnt_id], vsub, centroids_l2norm[qnt_id])

                # construct the distance estimators from tabulated distances
                idsquerybase, disquerybase = self.sumidxtab(distab, coa_knn[coa_idx])
                v_idsquerybase.append(idsquerybase)
                v_disquerybase.append(disquerybase)

            idsquerybase = np.hstack(tuple(v_idsquerybase))
            disquerybase = np.hstack(tuple(v_disquerybase))
            profiler.end()

            profiler.start("knn")  # time for finding the kNN
            realk = min(disquerybase.shape[0], topk)
            cur_ids = pq_knn(disquerybase, realk)
            profiler.end()

            profiler.start("result")  # time for getting final result
            ids[qry_id, :realk] = idsquerybase[cur_ids]
            dis[qry_id, :realk] = disquerybase[cur_ids]
            profiler.end()

            if (qry_id + 1) % interval == 0:
                time_total += profiler.sum_overall()
                logging.info("\t%d/%d: %.3fms per query" % (qry_id + 1, nq, profiler.sum_average() * 1000))
                logging.info("\t\t%s" % profiler.str_average())
                profiler.reset()
        logging.info("Querying Finished!")
        time_total += profiler.sum_overall()
        logging.info("Average querying time: %.3fms" % (time_total * 1000 / nq))

        return ids, dis
Exemplo n.º 4
0
Arquivo: vq.py Projeto: xdTin/hdidx
    def search(self, queries, topk=None, **kwargs):
        nn_coa = kwargs.get('nn_coa', 8)
        nq = queries.shape[0]

        dsub = self.encoder.ecdat['dsub']
        nsubq = self.encoder.ecdat['nsubq']
        ksub = self.encoder.ecdat['ksub']
        centroids = self.encoder.ecdat['centroids']
        coa_centroids = self.encoder.ecdat['coa_centroids']

        centroids_l2norm = []
        for i in xrange(nsubq):
            centroids_l2norm.append((centroids[i]**2).sum(1))
        coa_centroids_l2norm = (coa_centroids**2).sum(1)

        distab = np.zeros((nsubq, ksub), np.single)
        dis = np.ones((nq, topk), np.single) * np.inf
        ids = np.ones((nq, topk), np.int32) * -1

        profiler = Profiler()
        interval = 100 if nq >= 100 else 10
        time_total = 0.0  # total time for all queries
        logging.info('Start Querying ...')
        for qry_id in xrange(nq):
            # Here `copy()` can ensure that you DONOT modify the queries
            query = queries[qry_id:qry_id + 1, :].copy()
            profiler.start("coa_knn")
            coa_dist = fast_euclidean(coa_centroids, query,
                                      coa_centroids_l2norm).reshape(-1)
            # profiler.end()
            # profiler.start("coa_knn_knn")
            coa_knn = pq_knn(coa_dist, nn_coa)
            profiler.end()

            profiler.start("distab+distance")
            query = query - coa_centroids[coa_knn, :]
            v_idsquerybase = []
            v_disquerybase = []
            for coa_idx in range(nn_coa):
                # pre-compute the table of squared distance to centroids
                for qnt_id in range(nsubq):
                    vsub = query[coa_idx:coa_idx + 1,
                                 qnt_id * dsub:(qnt_id + 1) * dsub]
                    distab[qnt_id:qnt_id + 1, :] = fast_euclidean(
                        centroids[qnt_id], vsub, centroids_l2norm[qnt_id])

                # construct the distance estimators from tabulated distances
                idsquerybase, disquerybase = self.sumidxtab(
                    distab, coa_knn[coa_idx])
                v_idsquerybase.append(idsquerybase)
                v_disquerybase.append(disquerybase)

            idsquerybase = np.hstack(tuple(v_idsquerybase))
            disquerybase = np.hstack(tuple(v_disquerybase))
            profiler.end()

            profiler.start("knn")  # time for finding the kNN
            realk = min(disquerybase.shape[0], topk)
            cur_ids = pq_knn(disquerybase, realk)
            profiler.end()

            profiler.start("result")  # time for getting final result
            ids[qry_id, :realk] = idsquerybase[cur_ids]
            dis[qry_id, :realk] = disquerybase[cur_ids]
            profiler.end()

            if (qry_id + 1) % interval == 0:
                time_total += profiler.sum_overall()
                logging.info('\t%d/%d: %.3fms per query' %
                             (qry_id + 1, nq, profiler.sum_average() * 1000))
                logging.info("\t\t%s" % profiler.str_average())
                profiler.reset()
        logging.info('Querying Finished!')
        time_total += profiler.sum_overall()
        logging.info("Average querying time: %.3fms" %
                     (time_total * 1000 / nq))

        return ids, dis