def search(config, model, vocab, query, n_results=10): device = torch.device( f"cuda:{args.gpu_id}" if torch.cuda.is_available() else "cpu") model = model.to(device) model.eval() # device = next(model.parameters()).device desc, desc_len = sent2indexes( query, vocab_desc, config['desc_len']) #convert query into word indices desc = torch.from_numpy(desc).unsqueeze(0).to(device) desc_len = torch.from_numpy(desc_len).clamp( max=config['desc_len']).to(device) with torch.no_grad(): desc_repr = model.desc_encoding(desc, desc_len).data.cpu().numpy().astype( np.float32) # [1 x dim] # desc_repr = model.desc_encoding(desc, desc_len) if config[ 'sim_measure'] == 'cos': # normalizing vector for fast cosine computation desc_repr = normalize(desc_repr) # [1 x dim] results = [] threads = [] for i, codevecs_chunk in enumerate(codevecs): t = threading.Thread(target=search_thread, args=(results, desc_repr, codevecs_chunk, i, n_results, config['sim_measure'])) threads.append(t) for t in threads: t.start() for t in threads: #wait until all sub-threads have completed t.join() return results
def search(self, model, query, n_results=10): desc = sent2indexes( query, self.vocab_desc) # convert desc sentence into word indices logger.debug("Description representation") desc = np.expand_dims(desc, axis=0) desc = gVar(desc) logger.debug("Description embedding") desc_repr = model.eval().desc_encoding(desc).data.cpu().numpy() valued = [] threads = [] for i, codevecs_chunk in enumerate(self.codevecs): # select the best n_results from each chunk t = threading.Thread(target=self.search_thread, args=(valued, desc_repr, codevecs_chunk, i, n_results)) threads.append(t) for t in threads: t.start() for t in threads: # wait until all sub-threads finish t.join() valued.sort(reverse=True) return valued[:n_results]
def search(self,model,query,n_results=10): desc=sent2indexes(query, self.vocab_desc)#convert desc sentence into word indices desc = np.expand_dims(desc, axis=0) desc=gVar(desc) desc_repr=model.desc_encoding(desc).data.cpu().numpy() codes=[] sims=[] threads=[] for i, codevecs_chunk in enumerate(self.codevecs): t = threading.Thread(target=self.search_thread, args = (codes,sims,desc_repr, codevecs_chunk,i,n_results)) threads.append(t) for t in threads: t.start() for t in threads:#wait until all sub-threads finish t.join() return codes,sims
def search(config, model, vocab, query, n_results=10): model.eval() device = next(model.parameters()).device desc, desc_len = sent2indexes( query, vocab_desc, config['desc_len']) #convert query into word indices desc = torch.from_numpy(desc).unsqueeze(0).to(device) desc_len = torch.from_numpy(desc_len).clamp( max=config['desc_len']).to(device) # data_path = './data/vocab.desc.json' # vocab_desc = load_dict(data_path) # nl_len = len(nl) # for i in range(nl_len): # nl[i] = vocab_desc.get(nl[i], 3) # # print(nl) # # print("\n") # # nl2index, nl_len = sent2indexes(nl, vocab_desc, 30) # nl2long = np.array(nl).astype(np.long) # good_desc_len = min(int(nl_len), self.max_desc_len) # good_desc = nl2long # good_desc = self.pad_seq(good_desc, self.max_desc_len) with torch.no_grad(): desc_repr = model.desc_encoding(desc, desc_len).data.cpu().numpy().astype( np.float32) # [1 x dim] if config[ 'sim_measure'] == 'cos': # normalizing vector for fast cosine computation desc_repr = normalize(desc_repr) # [1 x dim] results = [] threads = [] for i, codevecs_chunk in enumerate(codevecs): t = threading.Thread(target=search_thread, args=(results, desc_repr, codevecs_chunk, i, n_results, config['sim_measure'])) threads.append(t) for t in threads: t.start() for t in threads: #wait until all sub-threads have completed t.join() return results
def search(config, model, vocab, query, n_results=10): model.eval() device = next(model.parameters()).device desc = sent2indexes(query, vocab_desc, config['desc_len']) #convert query into word indices desc = torch.from_numpy(desc).unsqueeze(0).to(device) desc_len = torch.LongTensor(1).fill_(len( query.split())).clamp(max=config['desc_len']).to(device) with torch.no_grad(): desc_repr = model.desc_encoding(desc, desc_len).data.cpu().numpy() codes, sims = [], [] threads = [] for i, codevecs_chunk in enumerate(codevecs): t = threading.Thread(target=search_thread, args=(codes, sims, desc_repr, codevecs_chunk, i, n_results)) threads.append(t) for t in threads: t.start() for t in threads: #wait until all sub-threads have completed t.join() return codes, sims