Exemplo n.º 1
0
def knn_faiss(database, queries, dim=300, k=5):
    """Get KNNs of all vectors in queries.

    Args:
        database - array of vectors from which neighbors are to be searched
        queries - array of vectors for which neighbors are to be searched
        dim - dimension of vectors
        k - k in KNN

    returns:
        dist - A matrix of shape (queries.shape[0], k)
             - distances of it's KNNs of each query

        idxs - A matrix of shape (queries.shape[0], k)
             - indicies of it's KNNs of each query
    """
    database = database.astype('float32')
    queries = queries.astype('float32')

    index = faiss.IndexFlatL2(dim)
    gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index.add(database)

    dist, idxs = gpu_index.search(queries, k)

    return dist, idxs
Exemplo n.º 2
0
def search_against_fragment(fragment: NpArray,
                            test_vectors: NpArray) -> TwoNpArrays:
    # build a flat index (CPU)
    index_flat = faiss.IndexFlatL2(d)

    # make it into a GPU index
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)

    print("loading", fragment)
    landmark_data = np.load(fragment)
    index_names, index_vectors = landmark_data["images"], landmark_data[
        "features"]
    print("index_names:", index_names.shape)
    print("vectors shape", index_vectors.shape)

    gpu_index_flat.add(index_vectors)
    print("total size of index:", gpu_index_flat.ntotal)

    # print("sanity search...")
    # distances, index = gpu_index_flat.search(index_vectors[:10], K)  # actual search
    # print(index[0])
    # print(distances[0])

    print("searching")
    distances, index = gpu_index_flat.search(test_vectors, K)  # actual search
    index = index_names[index]
    print(index[:10, :5])
    print(distances[:10, :5])
    return index, distances
Exemplo n.º 3
0
def load_globally(word_vectors_fpath: str, faiss_gpu: bool):
    global wv
    global index_faiss

    print("Loading word vectors from:", word_vectors_fpath)
    tic = time()
    if word_vectors_fpath.endswith(".vec.gz"):
        wv = KeyedVectors.load_word2vec_format(word_vectors_fpath,
                                               binary=False,
                                               unicode_errors="ignore")
    else:
        wv = KeyedVectors.load(word_vectors_fpath)
    print("Loaded in {} sec.".format(time() - tic))

    wv.init_sims(replace=True)

    if faiss_gpu:
        res = faiss.StandardGpuResources()  # use a single GPU
        index_flat = faiss.IndexFlatIP(
            wv.vector_size)  # build a flat (CPU) index
        index_faiss = faiss.index_cpu_to_gpu(
            res, GPU_DEVICE, index_flat)  # make it into a gpu index
        index_faiss.add(wv.syn0norm)  # add vectors to the index
    else:
        index_faiss = faiss.IndexFlatIP(wv.vector_size)
        index_faiss.add(wv.syn0norm)
    return wv
Exemplo n.º 4
0
    def __init__(self,
                 target,
                 nprobe=128,
                 num_gpu=None,
                 index_factory_str=None,
                 verbose=False,
                 mode='proxy',
                 using_gpu=True):
        self._res_list = []

        found_gpu = len(os.environ['CUDA_VISIBLE_DEVICES'].split(","))
        if found_gpu == 0:
            raise RuntimeError(
                "No GPU found. Please export CUDA_VISIBLE_DEVICES")
        if num_gpu is None or num_gpu > found_gpu:
            num_gpu = found_gpu
        print('[faiss gpu] #GPU: {}'.format(num_gpu))

        size, dim = target.shape
        assert size > 0, "size: {}".format(size)
        index_factory_str = "IVF{},PQ{}".format(
            min(8192, 16 * round(np.sqrt(size))),
            32) if index_factory_str is None else index_factory_str
        cpu_index = faiss.index_factory(dim, index_factory_str)
        cpu_index.nprobe = nprobe

        if mode == 'proxy':
            co = faiss.GpuClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False

            index = faiss.IndexProxy()
            for i in range(num_gpu):
                res = faiss.StandardGpuResources()
                self._res_list.append(res)
                sub_index = faiss.index_cpu_to_gpu(
                    res, i, cpu_index, co) if using_gpu else cpu_index
                index.addIndex(sub_index)
        elif mode == 'shard':
            raise NotImplementedError
        else:
            raise KeyError("Unknown index mode")

        index = faiss.IndexIDMap(index)
        index.verbose = verbose

        # get nlist to decide how many samples used for training
        nlist = int([
            item for item in index_factory_str.split(",") if 'IVF' in item
        ][0].replace("IVF", ""))

        # training
        if not index.is_trained:
            indexes_sample_for_train = np.random.randint(0, size, nlist * 256)
            index.train(target[indexes_sample_for_train])

        # add with ids
        target_ids = np.arange(0, size)
        index.add_with_ids(target, target_ids)
        self.index = index
Exemplo n.º 5
0
    def graph_init(self, X):
        d = X.shape[1]
        res = faiss.StandardGpuResources()
        index_flat = faiss.IndexFlatL2(d)
        gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
        gpu_index_flat.add(X)
        N = X.shape[0]
        c = time.time()
        self.D, self.I = gpu_index_flat.search(X, self.m)
        elapsed = time.time() - c

        rr = np.zeros(N);
        A = np.zeros((N, N));
        for i in range(N):
            di = np.array(self.D[i, 1:self.k + 2])
            if(i==0):
                print(di)
            rr[i] = 0.5 * (self.k * di[self.k] - sum(di[0:self.k]))
            id = self.I[i, 1:self.k + 2];
            A[i, id] = (di[self.k] - di) / (self.k * di[self.k] - sum(di[1:self.k]) + 2.2204e-16);
        self.r = sum(rr) / len(rr);
        self.Lambda = self.r;
        A0 = (A + A.transpose()) / 2;
        D0 = np.eye(N) * A0.sum(axis=1);
        L0 = D0 - A0;


        return L0,A0
def doRetrieval(Q, X, k=100, verbose=True):
    res = faiss.StandardGpuResources()
    if verbose:
        print("creating indexFlatl2")
    index = faiss.IndexFlatL2(X.shape[1])
    if verbose:
        print("put to gpu")
    index = faiss.index_cpu_to_gpu(res, 0, index)
    if verbose:
        print("adding index to faiss")

    # X shape: nxd
    # split into 2 chunks
    index.add(X)
    if verbose:
        print("num of index: " + str(index.ntotal))
    if verbose:
        print("searching")
    start = time.time()
    D, I = index.search(Q, k)
    if verbose:
        print('Computing dot product')
    elapse = time.time() - start
    if verbose:
        print(elapse)

    return D, I
Exemplo n.º 7
0
    def __init__(self, dim=10, nlist=100, gpu=-1):
        self.dim = dim
        self.nlist = nlist  #聚类中心的个数
        #self.index = faiss.IndexFlatL2(dim)    # build the index
        quantizer = faiss.IndexFlatL2(dim)  # the other index

        # faiss.METRIC_L2: faiss定义了两种衡量相似度的方法(metrics),
        # 分别为faiss.METRIC_L2 欧式距离、 faiss.METRIC_INNER_PRODUCT 向量内积
        # here we specify METRIC_L2, by default it performs inner-product search
        self.index = faiss.IndexIVFFlat(quantizer, dim, self.nlist,
                                        faiss.METRIC_L2)

        try:
            if gpu >= 0:
                if gpu == 0:
                    # use a single GPU
                    res = faiss.StandardGpuResources()
                    gpu_index = faiss.index_cpu_to_gpu(res, 0, self.index)
                else:
                    gpu_index = faiss.index_cpu_to_all_gpus(self.index)

                self.index = gpu_index
        except:
            pass

        # data
        self.xb = None
Exemplo n.º 8
0
def KNN(query, gallery, K=10, mode='ones'):
    '''retrieves the K-Nearest Neighbors in the gallery'''
    d = query.shape[1]
    query = L2norm(query)
    gallery = L2norm(gallery)

    res = faiss.StandardGpuResources()

    index_flat = faiss.IndexFlatL2(d)

    if torch.cuda.is_available():
        gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
        gpu_index_flat.add(gallery)
        D, I = gpu_index_flat.search(query, K)
    else:
        index_flat.add(gallery)
        D, I = index_flat.search(query, K)

    if mode == 'lin':
        weights = (float(K) - np.arange(0, K)) / float(K)
    elif mode == 'exp':
        weights = np.exp(-np.arange(0, K))
    elif mode == 'ones':
        weights = np.ones(K)
    weights_sum = weights.sum()

    new_queries = []
    for i in range(len(query)):
        idx = I[i, :K]
        to_consider = gallery[idx, :]
        new_queries.append(np.dot(weights, to_consider) / weights_sum)
    new_queries = np.asarray(new_queries, dtype=np.float32)
    return new_queries
Exemplo n.º 9
0
def detect_border_gpu(pcs):
    dim = 3
    sem_all = np.array(
        list(chain.from_iterable([[i] * len(pc) for i, pc in enumerate(pcs)])))
    pcs_all = np.concatenate(pcs, axis=0).astype(np.float32)

    index = np.random.choice(len(pcs_all),
                             int(len(pcs_all) / 10),
                             replace=False)
    pcs_chosen = pcs_all[index]
    sem_chosen = sem_all[index]

    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatL2(dim)
    gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index.add(pcs_chosen)

    k = 1024
    nq = 100000
    thresh = 0.01
    border = []

    for j in range(int(len(pcs_all) / nq) + 1):
        query = pcs_all[j * nq:(j + 1) * nq]
        print("{} queries for {}th batch".format(len(query), j))
        D, I = gpu_index.search(query, k)
        for i, (dis, ind) in enumerate(zip(D, I)):
            neighbor = ind[dis < thresh]
            if len(np.unique(sem_chosen[neighbor])) > 1:
                border.append(j * nq + i)

    return np.array(border)
Exemplo n.º 10
0
    def __init__(self,
                 vectors,
                 identifiers,
                 dim=512,
                 gpu=True,
                 inbuilt_index=False):
        """

        :param vectors:
        :param identifiers:
        :param dim:
        :param gpu:
        :param inbuilt_index:
        """
        Embeddings.validate(vectors, identifiers)
        self.__dimension = dim
        self.__vectors = vectors
        self.__identifiers = identifiers
        self.__gpu = gpu
        self.__inbuilt_index = inbuilt_index
        self.__quantizer = faiss.IndexFlatL2(dim)  # the other index

        if self.__inbuilt_index:
            self.__indexmap = faiss.IndexIDMap2(self.__quantizer)
        else:
            self.__indexmap = self.__quantizer

        if self.__gpu:
            self.__index = faiss.index_cpu_to_gpu(GPU, 0, self.__indexmap)

        else:
            self.__index = self.__indexmap
        self.__add()
Exemplo n.º 11
0
    def fit(self, X):

        res = faiss.StandardGpuResources()
        self.index = faiss.IndexFlatL2(X.shape[1])
        if self.device == 'gpu':
            self.index = faiss.index_cpu_to_gpu(res, 0, self.index)
        self.index.add(X.astype(np.float32))
Exemplo n.º 12
0
def train_coarse_quantizer(data,
                           quantizer_path,
                           num_clusters,
                           hnsw=False,
                           niter=10,
                           cuda=False):
    d = data.shape[1]

    index_flat = faiss.IndexFlatL2(d)
    # make it into a gpu index
    if cuda:
        res = faiss.StandardGpuResources()
        index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
    clus = faiss.Clustering(d, num_clusters)
    clus.verbose = True
    clus.niter = niter
    clus.train(data, index_flat)
    centroids = faiss.vector_float_to_array(clus.centroids)
    centroids = centroids.reshape(num_clusters, d)

    if hnsw:
        quantizer = faiss.IndexHNSWFlat(d, 32)
        quantizer.hnsw.efSearch = 128
        quantizer.train(centroids)
        quantizer.add(centroids)
    else:
        quantizer = faiss.IndexFlatL2(d)
        quantizer.add(centroids)

    faiss.write_index(quantizer, quantizer_path)
Exemplo n.º 13
0
def search_against_fragment(train_features: np.ndarray, test_features: np.ndarray) \
    -> Tuple[np.ndarray, np.ndarray]:
    if USE_GPU:
        # build a flat index (CPU)
        if USE_COSINE_DIST:
            index_flat = faiss.IndexFlat(DIMS, faiss.METRIC_INNER_PRODUCT)
        else:
            index_flat = faiss.IndexFlatL2(DIMS)

        # make it into a GPU index
        index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
    else:
        index_flat = faiss.IndexFlatIP(DIMS)

    index_flat.add(train_features)
    print("total size of the database:", index_flat.ntotal)

    # print("sanity search...")
    # distances, index = index_flat.search(train_features[:10], K)  # actual search
    # print(index[:10])
    # print(distances[:10])

    print("searching")
    distances, index = index_flat.search(test_features, K)  # actual search
    dprint(index)
    dprint(distances)
    dprint(describe(distances.flatten()))
    return index, distances
Exemplo n.º 14
0
def build_faiss_index(nd_feats_array, mode):
    """
    build index on multi GPUs
    :param nd_feats_array:
    :param mode: 0: CPU; 1: GPU; 2: Multi-GPU
    :return:
    """
    d = nd_feats_array.shape[1]

    cpu_index = faiss.IndexFlatL2(d)  # build the index on CPU
    if mode == 0:
        print("[INFO] Is trained? >> {}".format(cpu_index.is_trained))
        cpu_index.add(nd_feats_array)  # add vectors to the index
        print("[INFO] Capacity of gallery: {}".format(cpu_index.ntotal))

        return cpu_index
    elif mode == 1:
        ngpus = faiss.get_num_gpus()
        print("[INFO] number of GPUs:", ngpus)
        res = faiss.StandardGpuResources()  # use a single GPU
        gpu_index = faiss.index_cpu_to_gpu(res, 0, cpu_index)
        gpu_index.add(nd_feats_array)  # add vectors to the index
        print("[INFO] Capacity of gallery: {}".format(gpu_index.ntotal))

        return gpu_index
    elif mode == 2:
        multi_gpu_index = faiss.index_cpu_to_all_gpus(
            cpu_index)  # build the index on multi GPUs
        multi_gpu_index.add(nd_feats_array)  # add vectors to the index
        print("[INFO] Capacity of gallery: {}".format(multi_gpu_index.ntotal))

        return multi_gpu_index
Exemplo n.º 15
0
    def create_index(cls, embeddings):
        USE_SUBSET = None
        if USE_SUBSET is not None:
            print(f">> CAREFUL. Using subset {USE_SUBSET} / {len(embeddings)},"
                  f" {USE_SUBSET/len(embeddings):0.2%}")
            embeddings = embeddings[:USE_SUBSET]

        DIM = embeddings.shape[1]
        index = faiss.index_factory(DIM, FLAGS.faiss_index_factory,
                                    faiss.METRIC_INNER_PRODUCT)

        # index = faiss.IndexHNSWFlat(768, 128, faiss.METRIC_INNER_PRODUCT)
        cls.index_init(index)

        if FLAGS.faiss_use_gpu:
            print("\t- Moving to gpu")
            faiss_res = faiss.StandardGpuResources()
            index = faiss.index_cpu_to_gpu(faiss_res, 0, index)
        
        print("\t- Training the index")
        start = time.time()
        index.train(embeddings)
        print(f"\t- Training took "
              f"{tqdm.tqdm.format_interval(time.time() - start)}")

        print("\t- Adding the embeddings...")
        start = time.time()
        index.add(embeddings)
        print(f"\t- Adding took "
              f"{tqdm.tqdm.format_interval(time.time() - start)}")
        
        return index
Exemplo n.º 16
0
    def __call__(self, target_labels, features):
        labels, freqs = np.unique(target_labels, return_counts=True)
        R = len(features)

        faiss_search_index = faiss.IndexFlatL2(features.shape[-1])
        if isinstance(features, torch.Tensor):
            features = features.detach().cpu().numpy()
            res = faiss.StandardGpuResources()
            faiss_search_index = faiss.index_cpu_to_gpu(
                res, 0, faiss_search_index)
        faiss_search_index.add(features)
        nearest_neighbours = faiss_search_index.search(features,
                                                       int(R + 1))[1][:, 1:]

        target_labels = target_labels.reshape(-1)
        nn_labels = target_labels[nearest_neighbours]

        avg_r_precisions = []
        for label, freq in zip(labels, freqs):
            rows_with_label = np.where(target_labels == label)[0]
            for row in rows_with_label:
                n_recalled_samples = np.arange(1, R + 1)
                target_label_occ_in_row = nn_labels[row, :] == label
                cumsum_target_label_freq_row = np.cumsum(
                    target_label_occ_in_row)
                avg_r_pr_row = np.sum(
                    cumsum_target_label_freq_row * target_label_occ_in_row /
                    n_recalled_samples) / freq
                avg_r_precisions.append(avg_r_pr_row)

        return np.mean(avg_r_precisions)
Exemplo n.º 17
0
 def __init__(self, d=64, GPU=False, GPU_Number=0): #default dimension=64
     self.idx = faiss.IndexFlatL2( d )   # build the index
     self.GPU = GPU
     if self.GPU:
         self.res = faiss.StandardGpuResources()  # use a single GPU
         gpu = faiss.index_cpu_to_gpu(self.res, GPU_Number, self.idx)
         self.idx = gpu
Exemplo n.º 18
0
    def Indexes_of_inliers(self, Keypoints, Descriptors, buffersize):
        res = faiss.StandardGpuResources()
        nlist = 100
        quantizer = faiss.IndexFlatL2(256)
        index = faiss.IndexIVFFlat(quantizer, 256, nlist)

        gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)

        gpu_index_flat.train(
            clustering.preprocess_features(Descriptors[:buffersize]))
        gpu_index_flat.add(
            clustering.preprocess_features(Descriptors[:buffersize]))

        #we process the descriptors in batches of 10000 vectors
        rg = np.linspace(0,
                         len(Descriptors),
                         math.ceil(len(Descriptors) / 10000) + 1,
                         dtype=int)
        keypoints_outlier_score = np.zeros(len(Keypoints))
        for i in range(len(rg) - 1):
            descr = clustering.preprocess_features(Descriptors[rg[i]:rg[i +
                                                                        1], :])
            distance_to_closest_points, _ = gpu_index_flat.search(descr, 100)
            outlierscore = np.median(distance_to_closest_points, axis=1)
            keypoints_outlier_score[rg[i]:rg[i + 1]] = outlierscore

        inliers = keypoints_outlier_score.copy()
        inliers = np.sort(inliers)

        threshold = inliers[int(
            (1 - self.remove_superpoint_outliers_percentage) *
            (len(inliers) - 1))]
        inliers = keypoints_outlier_score < threshold
        return inliers
Exemplo n.º 19
0
    def _faiss_index_to_device(
            index: "faiss.Index",
            device: Optional[Union[int, List[int]]] = None) -> "faiss.Index":
        """
        Sends a faiss index to a device.
        A device can either be a positive integer (GPU id), a negative integer (all GPUs),
            or a list of positive integers (select GPUs to use), or `None` for CPU.
        """

        # If device is not specified, then it runs on CPU.
        if device is None:
            return index

        import faiss  # noqa: F811

        # If the device id is given as an integer
        if isinstance(device, int):
            # Positive integers are directly mapped to GPU ids
            if device > -1:
                faiss_res = faiss.StandardGpuResources()
                index = faiss.index_cpu_to_gpu(faiss_res, device, index)
            # And negative integers mean using all GPUs
            else:
                index = faiss.index_cpu_to_all_gpus(index)
        # Device ids given as a list mean mapping to those devices specified.
        elif isinstance(device, (list, tuple)):
            index = faiss.index_cpu_to_gpus_list(index, gpus=list(device))
        else:
            raise TypeError(
                f"The argument type: {type(device)} is not expected. " +
                "Please pass in either nothing, a positive int, a negative int, or a list of positive ints."
            )

        return index
Exemplo n.º 20
0
def _knn_faiss(data_numpy, k, metric='euclidean', use_gpu=False):
    import faiss

    data_numpy = data_numpy.astype(np.float32)
    data_numpy = data_numpy.copy(order='C')
    data_numpy = np.ascontiguousarray(data_numpy, dtype=np.float32)

    if use_gpu:
        print('Using GPU for Faiss...')
        res = faiss.StandardGpuResources()
    else:
        print('Using CPU for Faiss...')

    if metric == 'euclidean':
        index = faiss.IndexFlatL2(data_numpy.shape[1])

    elif metric == 'manhattan':
        index = faiss.IndexFlat(data_numpy.shape[1], faiss.METRIC_L1)
    elif metric == 'cosine':
        index = faiss.IndexFlat(data_numpy.shape[1], faiss.METRIC_INNER_PRODUCT)
        faiss.normalize_L2(data_numpy)

    if use_gpu:
        index = faiss.index_cpu_to_gpu(res, 0, index)

    data_numpy = np.ascontiguousarray(data_numpy, dtype=np.float32)
    index.train(data_numpy)
    assert index.is_trained

    index.add(data_numpy)
    nprobe = data_numpy.shape[0]
    index.nprobe = nprobe
    distances, neighbors = index.search(data_numpy, k)

    return distances, neighbors
Exemplo n.º 21
0
def train_index(data,
                quantizer_path,
                trained_index_path,
                fine_quant='SQ8',
                cuda=False):
    quantizer = faiss.read_index(quantizer_path)
    if fine_quant == 'SQ8':
        trained_index = faiss.IndexIVFScalarQuantizer(quantizer, quantizer.d,
                                                      quantizer.ntotal,
                                                      faiss.METRIC_L2)
    elif fine_quant.startswith('PQ'):
        m = int(fine_quant[2:])
        trained_index = faiss.IndexIVFPQ(quantizer, quantizer.d,
                                         quantizer.ntotal, m, 8)
    else:
        raise ValueError(fine_quant)

    if cuda:
        if fine_quant.startswith('PQ'):
            print('PQ not supported on GPU; keeping CPU.')
        else:
            res = faiss.StandardGpuResources()
            gpu_index = faiss.index_cpu_to_gpu(res, 0, trained_index)
            gpu_index.train(data)
            trained_index = faiss.index_gpu_to_cpu(gpu_index)
    else:
        trained_index.train(data)
    faiss.write_index(trained_index, trained_index_path)
Exemplo n.º 22
0
 def __init__(self, database, method):
     super().__init__(database, method)
     self.index = {'cosine': faiss.IndexFlatIP,
                   'euclidean': faiss.IndexFlatL2}[method](self.D)
     if os.environ.get('CUDA_VISIBLE_DEVICES'):
         res = faiss.StandardGpuResources()
         self.index = faiss.index_cpu_to_gpu(res, 0, self.index)
     self.add()
Exemplo n.º 23
0
def get_nn_avg_dist(emb, query, knn):
    # cpu mode
    res = faiss.StandardGpuResources()  # use a single GPU
    index = faiss.IndexFlatIP(emb.shape[1])
    index = faiss.index_cpu_to_gpu(res, 0, index)
    index.add(emb)
    distances, _ = index.search(query, knn)
    return distances.mean(1)
Exemplo n.º 24
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    spec = osp.basename(args.path)

    try:
        faiss_spec = parse_faiss_specs(spec.rstrip("/"))[0]
    except:
        print(spec)
        raise

    print("Faiss Spec:", faiss_spec, file=sys.stderr)

    if faiss_spec.pca:
        A = torch.from_numpy(np.load(osp.join(args.path, "pca_A.npy"))).cuda()
        b = torch.from_numpy(np.load(osp.join(args.path, "pca_b.npy"))).cuda()
        print("Loaded PCA", file=sys.stderr)

    centroids = np.load(osp.join(args.path, "centroids.npy"))
    print("Loaded centroids", centroids.shape, file=sys.stderr)

    res = faiss.StandardGpuResources()
    index_flat = (faiss.IndexFlatL2(centroids.shape[1])
                  if not faiss_spec.sphere else faiss.IndexFlatIP(
                      centroids.shape[1]))
    faiss_index = faiss.index_cpu_to_gpu(res, 0, index_flat)
    faiss_index.add(centroids)

    generator, num, root = get_iterator(args)
    iterator = generator()

    had_labels = False
    label_path = osp.join(args.path, f"{args.split}.{args.labels}")

    with torch.no_grad():
        with open(osp.join(args.path, f"{args.split}.src"),
                  "w") as fp, open(osp.join(args.path, f"{args.split}.tsv"),
                                   "w") as pp, open(label_path, "w") as lp:
            print(root, file=pp)
            for f, fname, lbl in tqdm.tqdm(iterator, total=num):
                if faiss_spec.pca:
                    f = torch.mm(f, A) + b
                if faiss_spec.norm:
                    f = F.normalize(f, p=2, dim=-1)

                f = f.cpu().numpy()

                _, z = faiss_index.search(f, 1)

                print(" ".join(str(x.item()) for x in z), file=fp)
                print(fname, file=pp)

                if lbl is not None:
                    print(lbl, file=lp)
                    had_labels = True
    if not had_labels:
        os.remove(label_path)
Exemplo n.º 25
0
    def add_vectors(
        self,
        vectors: Union[np.array, "Dataset"],
        column: Optional[str] = None,
        batch_size: int = 1000,
        train_size: Optional[int] = None,
        faiss_verbose: Optional[bool] = None,
    ):
        """
        Add vectors to the index.
        If the arrays are inside a certain column, you can specify it using the `column` argument.
        """
        import faiss  # noqa: F811

        # Create index
        if self.faiss_index is None:
            size = len(vectors[0]) if column is None else len(vectors[0][column])
            if self.string_factory is not None:
                if self.metric_type is None:
                    index = faiss.index_factory(size, self.string_factory)
                else:
                    index = faiss.index_factory(size, self.string_factory, self.metric_type)
            else:
                if self.metric_type is None:
                    index = faiss.IndexFlat(size)
                else:
                    index = faiss.IndexFlat(size, self.metric_type)
            if self.device is not None and self.device > -1:
                self.faiss_res = faiss.StandardGpuResources()
                index = faiss.index_cpu_to_gpu(self.faiss_res, self.device, index)
            self.faiss_index = index
            logger.info(f"Created faiss index of type {type(self.faiss_index)}")

        # Set verbosity level
        if faiss_verbose is not None:
            self.faiss_index.verbose = faiss_verbose
            if hasattr(self.faiss_index, "index") and self.faiss_index.index is not None:
                self.faiss_index.index.verbose = faiss_verbose
            if hasattr(self.faiss_index, "quantizer") and self.faiss_index.quantizer is not None:
                self.faiss_index.quantizer.verbose = faiss_verbose
            if hasattr(self.faiss_index, "clustering_index") and self.faiss_index.clustering_index is not None:
                self.faiss_index.clustering_index.verbose = faiss_verbose

        # Train
        if train_size is not None:
            train_vecs = vectors[:train_size] if column is None else vectors[:train_size][column]
            logger.info(f"Training the index with the first {len(train_vecs)} vectors")
            self.faiss_index.train(train_vecs)
        else:
            logger.info("Ignored the training step of the faiss index as `train_size` is None.")

        # Add vectors
        logger.info(f"Adding {len(vectors)} vectors to the faiss index")
        for i in utils.tqdm(
            range(0, len(vectors), batch_size), disable=bool(logging.get_verbosity() == logging.NOTSET)
        ):
            vecs = vectors[i : i + batch_size] if column is None else vectors[i : i + batch_size][column]
            self.faiss_index.add(vecs)
Exemplo n.º 26
0
    def indexer(self):

        index = faiss.index_factory(self.dimensions, INDEX_KEY)
        if USE_GPU:
            res = faiss.StandardGpuResources()
            index = faiss.index_cpu_to_gpu(res, 0, index)
        images_list = self.iterate_files()
        # prepare ids
        ids_count = 0
        index_dict = {}
        ids = None
        features = np.matrix([])
        for file_name in images_list:
            ret, sift_feature = self.calc_sift(file_name)

            if ret == 0 and sift_feature.any():
                # record id and path
                image_dict = {ids_count: (file_name, sift_feature)}
                index_dict.update(image_dict)
                ids_list = np.linspace(ids_count,
                                       ids_count,
                                       num=sift_feature.shape[0],
                                       dtype="int64")
                ids_count += 1
                if features.any():
                    features = np.vstack((features, sift_feature))
                    ids = np.hstack((ids, ids_list))
                else:
                    features = sift_feature
                    ids = ids_list
                if ids_count % 500 == 499:
                    if not index.is_trained and INDEX_KEY != "IDMap,Flat":
                        index.train(features)
                    index.add_with_ids(features, ids)
                    ids = None
                    features = np.matrix([])

        if features.any():
            print("training..")
            if not index.is_trained and INDEX_KEY != "IDMap,Flat":
                index.train(features)
            index.add_with_ids(features, ids)

        # save index
        print("saving index..")
        faiss.write_index(index, INDEX_PATH)
        # save ids
        with open(IDS_VECTORS_PATH, "wb+") as f:
            try:
                pickle.dump(index_dict, f, True)
            except EnvironmentError as e:
                print("Failed to save index file error:[{}]".format(e))
                f.close()
            except RuntimeError as v:
                print("Failed to save index file error:[{}]".format(v))
        f.close()
        # print("N", index.ntotal, dir(index), index.__dict__)
        return index.ntotal
Exemplo n.º 27
0
 def __init__(self, dimensions, gpu=False):
     if gpu:
         # requires faiss-gpu
         res = faiss.StandardGpuResources()  # use a single GPU
         index_flat = faiss.IndexFlatIP(dimensions)  # build a flat index
         self.index = faiss.index_cpu_to_gpu(res, 0, index_flat)
     else:
         self.index = faiss.IndexFlatIP(dimensions)
     self.ids = []
Exemplo n.º 28
0
 def __init__(self, x, gpu_id, verbose=False):
     DatasetAssign.__init__(self, x)
     index = faiss.IndexFlatL2(x.shape[1])
     if gpu_id >= 0:
         self.index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(),
                                             gpu_id, index)
     else:
         # -1 -> assign to all GPUs
         self.index = faiss.index_cpu_to_all_gpus(index)
Exemplo n.º 29
0
def samplePC(cubes, flip_bbox=False, split_bbox=False):
    cube_geom = []
    for c in cubes:
        cube_geom.append(
            torch.cat(
                (c['xd'].unsqueeze(0), c['yd'].unsqueeze(0),
                 c['zd'].unsqueeze(0), c['center'], c['xdir'], c['ydir'])))

    scene_geom = torch.stack([c for c in cube_geom]).to(device)
    ind_to_pc = {}

    for i in range(0, scene_geom.shape[0]):
        xyz = s_xyz

        s_inds = (torch.ones(1, xyz.shape[1]) * i).long().to(device)

        s_r = torch.cat(
            ((scene_geom[s_inds][:, :, 6:9] /
              (scene_geom[s_inds][:, :, 6:9].norm(dim=2).unsqueeze(2) +
               1e-8)).unsqueeze(3),
             (scene_geom[s_inds][:, :, 9:12] /
              (scene_geom[s_inds][:, :, 9:12].norm(dim=2).unsqueeze(2) +
               1e-8)).unsqueeze(3),
             torch.cross(
                 scene_geom[s_inds][:, :, 6:9] /
                 (scene_geom[s_inds][:, :, 6:9].norm(dim=2).unsqueeze(2) +
                  1e-8), scene_geom[s_inds][:, :, 9:12] /
                 (scene_geom[s_inds][:, :, 9:12].norm(dim=2).unsqueeze(2) +
                  1e-8)).unsqueeze(3)),
            dim=3)

        s_out = ((s_r @ ((
            (xyz - .5) * scene_geom[s_inds][:, :, :3]).unsqueeze(-1))
                  ).squeeze() + scene_geom[s_inds][:, :, 3:6]).squeeze()
        ind_to_pc[i] = s_out

    if flip_bbox:
        ind_to_pc[0] += bb_mask
        temp = ind_to_pc[0].clone()
        ind_to_pc[0][ft] = temp[fbo]
        ind_to_pc[0][fbo] = temp[ft]

    if split_bbox:
        bbox_pc = ind_to_pc.pop(0)
        ind_to_pc[-2] = bbox_pc.clone() + bot_mask
        ind_to_pc[-1] = bbox_pc.clone() + top_mask

    res = {}
    for key in ind_to_pc:
        index_cpu = faiss.IndexFlatL2(3)

        index = faiss.index_cpu_to_gpu(resource, torch.cuda.current_device(),
                                       index_cpu)
        index.add(np.ascontiguousarray(ind_to_pc[key].cpu().numpy()))
        res[key] = (ind_to_pc[key], index)

    return res, scene_geom
Exemplo n.º 30
0
 def fit(self, X):
     X = X.astype(numpy.float32)
     self._index = faiss.index_factory(len(X[0]), "IVF%d,PQ64" % self._n_bits)
     co = faiss.GpuClonerOptions()
     co.useFloat16 = True
     self._index = faiss.index_cpu_to_gpu(self._res, 0, self._index, co)
     self._index.train(X)
     self._index.add(X)
     self._index.setNumProbes(self._n_probes)
Exemplo n.º 31
0
    def make_index(self, flags_obj):

        self.make_index_brute_force(flags_obj)

        if flags_obj.cg_use_gpu:

            res = faiss.StandardGpuResources()
            self.index = faiss.index_cpu_to_gpu(res, flags_obj.cg_gpu_id,
                                                self.index)
Exemplo n.º 32
0
    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case
        index.add_with_ids(xb, np.arange(nb) * 3 + 12345)
        ts.append(time.time())

        index.nprobe = 4
        D, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        gpu_index.setNumProbes(4)

        D, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print 'times:', [t - ts[0] for t in ts]

        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            D, Inew = gpu_index.search(xq, 10)

            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)
Exemplo n.º 33
0
# keep track of optimal operating points seen so far
op = faiss.OperatingPoints()


for index_key in keys_to_test:

    print "============ key", index_key

    # make the index described by the key
    index = faiss.index_factory(d, index_key)


    if use_gpu:
        # transfer to GPU (may be partial)
        index = faiss.index_cpu_to_gpu(res, dev_no, index)
        params = faiss.GpuParameterSpace()
    else:
        params = faiss.ParameterSpace()

    params.initialize(index)

    print "[%.3f s] train & add" % (time.time() - t0)

    index.train(xt)
    index.add(xb)

    print "[%.3f s] explore op points" % (time.time() - t0)

    # find operating points for this index
    opi = params.explore(index, xq, crit)
Exemplo n.º 34
0
 def test_set_gpu_param(self):
     index = faiss.index_factory(12, "PCAR8,IVF10,PQ4")
     res = faiss.StandardGpuResources()
     gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
     faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)
# need to contactinate tests as well
xb = np.load("train_filenames.txt")
xq = np.load("train_featues.txt")
print(xb.shape)
print(xq.shape)


print(xq.shape)
import faiss
res = faiss.StandardGpuResources()  # use a single GPU

# build a flat (CPU) index
index_flat = faiss.IndexFlatL2(d)
# make it into a gpu index
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)

gpu_index_flat.add(xb)         # add vectors to the index
print(gpu_index_flat.ntotal)

k = 100                          # we want to see 4 nearest neighbors
D, I = gpu_index_flat.search(xq, k)  # actual search
# print(I[:5])                   # neighbors of the 5 first queries
# print(I[-5:])                  # neighbors of the 5 last queries

np.save("output/I.npy", I)
np.save("output/D.npy", D)

### make submission
index_path = "input/index/"
index_list = sorted(glob.glob(index_path + "*")) # 1091756
Exemplo n.º 36
0
#################################################################

print "============ Approximate search"

index = faiss.index_factory(d, "IVF4096,PQ64")

# faster, uses more memory
# index = faiss.index_factory(d, "IVF16384,Flat")

co = faiss.GpuClonerOptions()

# here we are using a 64-byte PQ, so we must set the lookup tables to
# 16 bit float (this is due to the limited temporary memory).
co.useFloat16 = True

index = faiss.index_cpu_to_gpu(res, 0, index, co)

print "train"

index.train(xt)

print "add vectors to index"

index.add(xb)

print "warmup"

index.search(xq, 123)

print "benchmark"