def train_index(start_data, quantizer_path, trained_index_path, num_clusters, fine_quant='SQ4', cuda=False, hnsw=False): ds = start_data.shape[1] quantizer = faiss.IndexFlatIP(ds) # Used only for reimplementation if fine_quant == 'SQ4': start_index = faiss.IndexIVFScalarQuantizer( quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit, faiss.METRIC_INNER_PRODUCT) # Default index type elif 'OPQ' in fine_quant: code_size = int(fine_quant[fine_quant.index('OPQ') + 3:]) if hnsw: start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96", faiss.METRIC_INNER_PRODUCT) else: opq_matrix = faiss.OPQMatrix(ds, code_size) opq_matrix.niter = 10 sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters, code_size, 8, faiss.METRIC_INNER_PRODUCT) start_index = faiss.IndexPreTransform(opq_matrix, sub_index) elif 'none' in fine_quant: start_index = faiss.IndexFlatIP(ds) else: raise ValueError(fine_quant) start_index.verbose = False if cuda: # Convert to GPU index res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) gpu_index.verbose = False # Train on GPU and back to CPU gpu_index.train(start_data) start_index = faiss.index_gpu_to_cpu(gpu_index) else: start_index.train(start_data) # Make sure to set direct map again if 'none' not in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) index_ivf.make_direct_map() index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable) faiss.write_index(start_index, trained_index_path)
def recommend(self, users: torch.tensor, k: int = 20): d = 64 all_users, all_items = self.computer() users_emb = all_users[users.long()].numpy() items_emb = all_items.numpy() index = faiss.IndexHNSWPQ(d, 4, 32) index.train(items_emb) index.add(items_emb) return index.search(users_emb, k)[1]
def __init__(self) -> None: self.index = faiss.IndexHNSWPQ(self.d, 8, 16) self.index.hnsw.efConstruction = 80 self.index.hnsw.efSearch = 64
vectors = [] with open(os.path.join(args.output, 'docid'), 'w') as f_out: for filename in tqdm(os.listdir(args.input)): path = os.path.join(args.input, filename) with open(path) as f_in: for line in f_in: info = json.loads(line) docid = info['id'] vector = info['vector'] f_out.write(f'{docid}\n') vectors.append(vector) vectors = np.array(vectors, dtype='float32') print(vectors.shape) if args.hnsw and args.pq: index = faiss.IndexHNSWPQ(args.dim, args.pq_m, args.M) index.hnsw.efConstruction = args.efC index.metric_type = faiss.METRIC_INNER_PRODUCT elif args.hnsw: index = faiss.IndexHNSWFlat(args.dim, args.M, faiss.METRIC_INNER_PRODUCT) index.hnsw.efConstruction = args.efC elif args.pq: index = faiss.IndexPQ(args.dim, args.pq_m, args.pq_nbits, faiss.METRIC_INNER_PRODUCT) else: index = faiss.IndexFlatIP(args.dim) index.verbose = True if args.pq: index.train(vectors)