def test_indices_ivfflat(self): res = faiss.StandardGpuResources() d = 128 nb = 5000 nlist = 10 rs = np.random.RandomState(567) xb = rs.rand(nb, d).astype('float32') xb_indices_base = np.arange(nb, dtype=np.int64) # Force values to not be representable in int32 xb_indices = (xb_indices_base + 4294967296).astype('int64') config = faiss.GpuIndexIVFFlatConfig() idx = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) idx.train(xb) idx.add_with_ids(xb, xb_indices) _, I = idx.search(xb[10:20], 5) self.assertTrue(np.array_equal(xb_indices[10:20], I[:, 0])) # Store values using 32-bit indices instead config.indicesOptions = faiss.INDICES_32_BIT idx = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) idx.train(xb) idx.add_with_ids(xb, xb_indices) _, I = idx.search(xb[10:20], 5) # This will strip the high bit self.assertTrue(np.array_equal(xb_indices_base[10:20], I[:, 0]))
def fit(self, Ciu): import faiss # train the model super(FaissAlternatingLeastSquares, self).fit(Ciu) self.quantizer = faiss.IndexFlat(self.factors) if self.gpu: self.gpu_resources = faiss.StandardGpuResources() item_factors = self.item_factors.astype('float32') if self.approximate_recommend: log.debug("Building faiss recommendation index") # build up a inner product index here if self.gpu: index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT) else: index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT) index.train(item_factors) index.add(item_factors) index.nprobe = self.nprobe self.recommend_index = index if self.approximate_similar_items: log.debug("Building faiss similar items index") # likewise build up cosine index for similar_items, using an inner product # index on normalized vectors` norms = numpy.linalg.norm(item_factors, axis=1) norms[norms == 0] = 1e-10 normalized = (item_factors.T / norms).T.astype('float32') if self.gpu: index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT) else: index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT) index.train(normalized) index.add(normalized) index.nprobe = self.nprobe self.similar_items_index = index
def __init__(self, cell_size=20, nr_cells=1024, K=4, num_lists=32, probes=32, res=None, train=None, gpu_id=-1): super(FAISSIndex, self).__init__() self.cell_size = cell_size self.nr_cells = nr_cells self.probes = probes self.K = K self.num_lists = num_lists self.gpu_id = gpu_id # BEWARE: if this variable gets deallocated, FAISS crashes self.res = res if res else faiss.StandardGpuResources() # self.res.setTempMemoryFraction(0.01) self.res.setTempMemory(int(0.01 * 4 * 1024 * 1024 * 1024)) if self.gpu_id != -1: self.res.initializeForDevice(self.gpu_id) nr_samples = self.nr_cells * 100 * self.cell_size train = train if train is not None else T.randn( self.nr_cells * 100, self.cell_size) self.index = faiss.GpuIndexIVFFlat(self.res, self.cell_size, self.num_lists, faiss.METRIC_L2) self.index.setNumProbes(self.probes) self.train(train)
def fit(self, X): X = X.astype(numpy.float32) self._index = faiss.GpuIndexIVFFlat(self._res, len(X[0]), self._n_bits, faiss.METRIC_L2) # self._index = faiss.index_factory(len(X[0]), "IVF%d,Flat" % self._n_bits) # co = faiss.GpuClonerOptions() # co.useFloat16 = True # self._index = faiss.index_cpu_to_gpu(self._res, 0, self._index, co) self._index.train(X) self._index.add(X) self._index.setNumProbes(self._n_probes)
def test_serialize(self): res = faiss.StandardGpuResources() d = 32 k = 10 train = make_t(10000, d) add = make_t(10000, d) query = make_t(10, d) # Construct various GPU index types indexes = [] # Flat indexes.append(faiss.GpuIndexFlatL2(res, d)) # IVF nlist = 5 # IVFFlat indexes.append(faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2)) # IVFSQ indexes.append(faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, faiss.ScalarQuantizer.QT_fp16)) # IVFPQ indexes.append(faiss.GpuIndexIVFPQ(res, d, nlist, 4, 8, faiss.METRIC_L2)) for index in indexes: index.train(train) index.add(add) orig_d, orig_i = index.search(query, k) ser = faiss.serialize_index(faiss.index_gpu_to_cpu(index)) cpu_index = faiss.deserialize_index(ser) gpu_index_restore = faiss.index_cpu_to_gpu(res, 0, cpu_index) restore_d, restore_i = gpu_index_restore.search(query, k) self.assertTrue(np.array_equal(orig_d, restore_d)) self.assertTrue(np.array_equal(orig_i, restore_i)) # Make sure the index is in a state where we can add to it # without error gpu_index_restore.add(query)
def fit(self, data): data = data.astype('float32') factors = data.shape[1] if self.gpu: self.res = faiss.StandardGpuResources() self.index = faiss.GpuIndexIVFFlat(self.res, factors, self.nlist, faiss.METRIC_INNER_PRODUCT) else: self.quantizer = faiss.IndexFlat(factors) self.index = faiss.IndexIVFFlat(self.quantizer, factors, self.nlist, faiss.METRIC_INNER_PRODUCT) self.index.train(data) self.index.add(data) self.index.nprobe = self.nprobe
def inference(discriminator, dev_src, dev_tgt): discriminator.eval() datapairslist = batchize(dev_src, dev_tgt, batch_size, volatile=True) score = 0 len_datalist = len(datapairslist) prec, rec, f1 = 0, 0, 0 _, e_v = load_kb.loadvec() flat_config = faiss.GpuIndexIVFFlatConfig() flat_config.device = 0 res = faiss.StandardGpuResources() index = faiss.GpuIndexIVFFlat(res, args.concept_size, 1000, faiss.METRIC_L2, flat_config) index.train(e_v) index.add(e_v) for i, item in enumerate(datapairslist): src_seqs, tgt_seqs, mask = item batch_len, maxlen = src_seqs.size() ### embbed, pre_kb_emb = encoder(src_seqs) embbed = embbed * mask.unsqueeze(-1) pre_kb_emb = (pre_kb_emb * mask.unsqueeze(-1)).permute(1, 0, 2) if not isinstance(pre_kb_emb, np.ndarray): pre_kb_emb = pre_kb_emb.data.cpu().numpy() v_list = [] for item in pre_kb_emb: D, I = index.search(item, args.num_kb) v_can = e_v[I] v_list.append(torch.from_numpy(v_can)) v = Variable(torch.stack(v_list, 0)) if USE_CUDA: v = v.cuda() v = v * mask.transpose(1, 0).unsqueeze(-1).unsqueeze(-1) ### scores, preds = vqcrf.inference(embbed, v, mask) micro_prec, micro_rec, micro_f1 = evaluate_acc(tgt_seqs, preds) prec += micro_prec rec += micro_rec f1 += micro_f1 return prec / len_datalist, rec / len_datalist, f1 / len_datalist
def test_train_add_with_ids(self): d = 32 nlist = 5 res = faiss.StandardGpuResources() res.noTempMemory() index = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2) xb = torch.rand(1000, d, device=torch.device('cuda', 0), dtype=torch.float32) index.train(xb) ids = torch.arange(1000, 1000 + xb.shape[0], device=torch.device('cuda', 0), dtype=torch.int64) # Test add_with_ids with torch gpu index.add_with_ids(xb, ids) _, I = index.search(xb[10:20], 1) self.assertTrue(torch.equal(I.view(10), ids[10:20])) # Test add_with_ids with torch cpu index.reset() xb_cpu = xb.cpu() ids_cpu = ids.cpu() index.train(xb_cpu) index.add_with_ids(xb_cpu, ids_cpu) _, I = index.search(xb_cpu[10:20], 1) self.assertTrue(torch.equal(I.view(10), ids_cpu[10:20])) # Test add_with_ids with numpy index.reset() xb_np = xb.cpu().numpy() ids_np = ids.cpu().numpy() index.train(xb_np) index.add_with_ids(xb_np, ids_np) _, I = index.search(xb_np[10:20], 1) self.assertTrue(np.array_equal(I.reshape(10), ids_np[10:20]))
def test_ivfflat(self): index = faiss.GpuIndexIVFFlat(faiss.StandardGpuResources(), self.d, self.nlist, faiss.METRIC_L2) index.train(self.xb)
d = np.load('amazon_title_bow.npy') else: print("Running pipeline...") pipeline = make_pipeline(CountVectorizer(stop_words='english', max_features=10000), TfidfTransformer(), TruncatedSVD(n_components=128)) d = pipeline.fit_transform(product_text).astype('float32') print("Saving BOW array.") np.save('amazon_title_bow.npy', d) print(d.shape) ncols = np.shape(d)[1] if use_gpu: gpu_resources = faiss.StandardGpuResources() index = faiss.GpuIndexIVFFlat(gpu_resources, ncols, 400, faiss.METRIC_INNER_PRODUCT) else: quantizer = faiss.IndexFlat(ncols) index = faiss.IndexIVFFlat(quantizer, ncols, 400, faiss.METRIC_INNER_PRODUCT) print(index.is_trained) index.train(d) print(index.is_trained) index.add(d) print(index.ntotal) rec_asins = ["0001048775"] for asin in rec_asins: idx = -1 for i in range(len(product_asin)):
def train_epoch(discriminator, train_src, train_tgt, epoch_index, lr): discriminator.train() datapairslist = batchize(train_src, train_tgt, batch_size) epoch_loss = 0 start_time = time.time() #encoder_optimizer = getattr(optim, args.optim)(encoder.parameters(), weight_decay=L2) #vqcrf_optimizer = getattr(optim, args.optim)(vqcrf.parameters(), weight_decay=L2) len_traintensorlist = len(train_src) idx_list = list(range(len_traintensorlist)) shuffle(datapairslist) _, e_v = load_kb.loadvec() flat_config = faiss.GpuIndexIVFFlatConfig() flat_config.device = 0 res = faiss.StandardGpuResources() index = faiss.GpuIndexIVFFlat(res, args.concept_size, 1000, faiss.METRIC_L2, flat_config) index.train(e_v) index.add(e_v) for i, item in enumerate(datapairslist): total_loss = 0 src_seqs, tgt_seqs, mask = item batch_len, maxlen = src_seqs.size() encoder.zero_grad() vqcrf.zero_grad() embbed, pre_kb_emb = encoder(src_seqs) embbed = embbed * mask.unsqueeze(-1) pre_kb_emb = (pre_kb_emb * mask.unsqueeze(-1)).permute(1, 0, 2) if not isinstance(pre_kb_emb, np.ndarray): pre_kb_emb = pre_kb_emb.data.cpu().numpy() v_list = [] for item in pre_kb_emb: D, I = index.search(item, args.num_kb) v_can = e_v[I] v_list.append(torch.from_numpy(v_can)) v = Variable(torch.stack(v_list, 0)) if USE_CUDA: v = v.cuda() v = v * mask.transpose(1, 0).unsqueeze(-1).unsqueeze(-1) neglogscore = vqcrf(embbed, v, tgt_seqs, mask).mean() #print("neglogscore", neglogscore.size()) #decoder_hidden = decoder.init_hidden(batch_len) neglogscore.backward() torch.nn.utils.clip_grad_norm(vqcrf.parameters(), args.clip) torch.nn.utils.clip_grad_norm(encoder.parameters(), args.clip) encoder_optimizer.step() vqcrf_optimizer.step() epoch_loss += neglogscore.data[0] print_loss = neglogscore.data[0] / len(tgt_seqs) if (i % print_every_train == 0 and i != 0) or (len_traintensorlist - 1 == i): using_time = time.time() - start_time print('| epoch %3d | %4d/%5d batches | ms/batch %5.5f | ' 'loss %5.15f | ppl: %5.2f |}' % (epoch_index, i, len_trainset // batch_size, using_time * 1000 / print_every_train, print_loss, math.exp(print_loss))) print_loss = 0 start_time = time.time() epoch_loss = epoch_loss / len_trainset return epoch_loss
k = 10 res = [faiss.StandardGpuResources() for i in range(ngpus)] # first we get StandardGpuResources of each GPU # ngpu is the num of GPUs flat_config = [] for i in range(ngpus): cfg = faiss.GpuIndexIVFFlatConfig( ) #faiss.GpuIndexFlatConfig() faiss.GpuIndexIVFPQConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) #indexes = [faiss.GpuIndexFlatL2(res[i],d,flat_config[i]) for i in range(ngpus)] #可行,速度快,不需要train,直接计算L2距离 #indexes = [faiss.GpuIndexIVFPQ(res[i],d,nlist, m,4,faiss.METRIC_L2,flat_config[i]) for i in range(ngpus)] indexes = [ faiss.GpuIndexIVFFlat(res[i], d, nlist, faiss.METRIC_L2, flat_config[i]) for i in range(ngpus) ] # then we make an Index array # useFloat16 is a boolean value index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) index.train(pin_data_drop_new) #影响PQ的时间的因素??? print(index.is_trained) index.add(pin_data_drop_new) index.nprobe = 30 #参数需要调,适当增加nprobe可以得到和brute-force相同的结果,nprobe控制了速度和精度的平衡
def mine_triplets(args, res, flat_config, ivfflat_config, embedding_id, index_id, embedding_neg_id, index_neg_id): """Mine hard triplets which violate margin constraints.""" hard = [] # res = faiss.StandardGpuResources() # flat_config = faiss.GpuIndexFlatConfig() # flat_config.device = 0 # ivfflat_config = faiss.GpuIndexIVFFlatConfig() # ivfflat_config.device = 0 ## co = faiss.GpuClonerOptions() ## co.useFloat16 = True for k in range(args.num_identities): # args.ann_file = os.path.join(args.ckpt_dir, 'ann_{:s}_{:s}_{:04d}.npz'.format(args.dset_name, args.arch, k)) d = embedding_id[k].shape[1] neg_nlist = int(math.sqrt(math.sqrt(embedding_neg_id[k].shape[0]))) # Build index index = None index = faiss.GpuIndexFlatL2(res, d, flat_config) index.nprobe = args.nprobe_gpu_limit assert index.is_trained index.add(embedding_id[k]) neg_index = None neg_index = faiss.GpuIndexIVFFlat(res, d, neg_nlist, faiss.METRIC_L2, ivfflat_config) # neg_index = faiss.GpuIndexFlatL2(res, d, flat_config) neg_index.nprobe = args.nprobe_gpu_limit assert not neg_index.is_trained neg_index.train(embedding_neg_id[k]) assert neg_index.is_trained neg_index.add(embedding_neg_id[k]) # Search ann_neg_dist, ann_neg_index = neg_index.search(embedding_id[k], args.num_neighbors) # print(ann_neg_dist) # print(ann_neg_index) ann_dist, ann_index = index.search(embedding_id[k], args.num_neighbors) # print(ann_index) # Generate hard triplets for a_ in range(ann_index.shape[0]): for p_ctr in range(args.num_neighbors): p_ = int(ann_index.shape[1]) - 1 - p_ctr a = index_id[k][a_] for n_ in range(args.num_neighbors): p = index_id[k][ann_index[a_, p_]] n = index_neg_id[k][ann_neg_index[a_, n_]] if ann_dist[a_, p_] - ann_neg_dist[ a_, n_] + args.margin >= 0: # hard example: violates margin hard.append((a, p, n)) # print('#Tuples: ', len(hard)) # joblib.dump({'ann_index': ann_index, 'ann_dist': ann_dist, # 'ann_neg_index': ann_neg_index, 'ann_neg_dist': ann_neg_dist}, # args.ann_file # ) index.reset() neg_index.reset() index = None neg_index = None gc.collect() # res = None gc.collect() return hard