def train_index(start_data,
                quantizer_path,
                trained_index_path,
                num_clusters,
                fine_quant='SQ4',
                cuda=False,
                hnsw=False):
    ds = start_data.shape[1]
    quantizer = faiss.IndexFlatIP(ds)

    # Used only for reimplementation
    if fine_quant == 'SQ4':
        start_index = faiss.IndexIVFScalarQuantizer(
            quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit,
            faiss.METRIC_INNER_PRODUCT)

    # Default index type
    elif 'OPQ' in fine_quant:
        code_size = int(fine_quant[fine_quant.index('OPQ') + 3:])
        if hnsw:
            start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96",
                                            faiss.METRIC_INNER_PRODUCT)
        else:
            opq_matrix = faiss.OPQMatrix(ds, code_size)
            opq_matrix.niter = 10
            sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters,
                                         code_size, 8,
                                         faiss.METRIC_INNER_PRODUCT)
            start_index = faiss.IndexPreTransform(opq_matrix, sub_index)
    elif 'none' in fine_quant:
        start_index = faiss.IndexFlatIP(ds)
    else:
        raise ValueError(fine_quant)

    start_index.verbose = False
    if cuda:
        # Convert to GPU index
        res = faiss.StandardGpuResources()
        co = faiss.GpuClonerOptions()
        co.useFloat16 = True
        gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co)
        gpu_index.verbose = False

        # Train on GPU and back to CPU
        gpu_index.train(start_data)
        start_index = faiss.index_gpu_to_cpu(gpu_index)
    else:
        start_index.train(start_data)

    # Make sure to set direct map again
    if 'none' not in fine_quant:
        index_ivf = faiss.extract_index_ivf(start_index)
        index_ivf.make_direct_map()
        index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable)
    faiss.write_index(start_index, trained_index_path)
Пример #2
0
    def recommend(self, users: torch.tensor, k: int = 20):
        d = 64

        all_users, all_items = self.computer()
        users_emb = all_users[users.long()].numpy()
        items_emb = all_items.numpy()

        index = faiss.IndexHNSWPQ(d, 4, 32)
        index.train(items_emb)
        index.add(items_emb)
        return index.search(users_emb, k)[1]
Пример #3
0
 def __init__(self) -> None:
     self.index = faiss.IndexHNSWPQ(self.d, 8, 16)
     self.index.hnsw.efConstruction = 80
     self.index.hnsw.efSearch = 64
Пример #4
0
        vectors = []
        with open(os.path.join(args.output, 'docid'), 'w') as f_out:
            for filename in tqdm(os.listdir(args.input)):
                path = os.path.join(args.input, filename)
                with open(path) as f_in:
                    for line in f_in:
                        info = json.loads(line)
                        docid = info['id']
                        vector = info['vector']
                        f_out.write(f'{docid}\n')
                        vectors.append(vector)
    vectors = np.array(vectors, dtype='float32')
    print(vectors.shape)

    if args.hnsw and args.pq:
        index = faiss.IndexHNSWPQ(args.dim, args.pq_m, args.M)
        index.hnsw.efConstruction = args.efC
        index.metric_type = faiss.METRIC_INNER_PRODUCT
    elif args.hnsw:
        index = faiss.IndexHNSWFlat(args.dim, args.M,
                                    faiss.METRIC_INNER_PRODUCT)
        index.hnsw.efConstruction = args.efC
    elif args.pq:
        index = faiss.IndexPQ(args.dim, args.pq_m, args.pq_nbits,
                              faiss.METRIC_INNER_PRODUCT)
    else:
        index = faiss.IndexFlatIP(args.dim)
    index.verbose = True

    if args.pq:
        index.train(vectors)