def test_padded_tensor(self): # list of lists lol = [[1, 2], [3, 4, 5]] output, lens = padded_tensor(lol) assert np.all(output.numpy() == np.array([[1, 2, 0], [3, 4, 5]])) assert lens == [2, 3] output, _ = padded_tensor(lol, left_padded=True) assert np.all(output.numpy() == np.array([[0, 1, 2], [3, 4, 5]])) output, _ = padded_tensor(lol, pad_idx=99) assert np.all(output.numpy() == np.array([[1, 2, 99], [3, 4, 5]]))
def _add_prev_responses(self, batch, cands, cand_vecs, label_inds, source): assert source not in ['fixed', 'vocab'] self._extract_prev_responses(batch) # Add prev_responses as negatives prev_cands = self.prev_responses prev_cand_vecs = [ torch.Tensor(self.dict.txt2vec(cand)) for cand in prev_cands ] if source == 'batch': # Option 1: Change from one set of shared candidates to separate per example # cands = [cands + [prev_cand] for prev_cand in prev_cands] # list_of_lists_of_cand_vecs = [[vec for vec in cand_vecs] + [prev_cand_vec] # for prev_cand_vec in prev_cand_vecs] # cand_vecs = padded_3d(list_of_lists_of_cand_vecs, use_cuda=self.use_cuda, # dtype=cand_vecs[0].dtype) # Option 2: Just add all prev responses for the whole batch (doubles bs) cands += prev_cands cand_vecs, _ = padded_tensor([vec for vec in cand_vecs] + prev_cand_vecs, use_cuda=self.use_cuda) elif source == 'inline': raise NotImplementedError return cands, cand_vecs
def predict_satisfaction_by_uncertainty(self, batch): # Use the dialog model's confidence to predict the rating on previous response # HACK: this test is run using a model that was trained on dialog but is now # being evaluated on satisfaction. We use a satisfaction dataset so that we have # access to the satisfaction labels. Therefore, we do a sloppy hack here to pull # out what would have been the candidates for the dialog task. We use histsz=4 # and ignore the last response (the user's feedback) and use the penultimate # utterances as the candidates. The (up to) two utterances before that are # context. # pull out dialog candidates from text_vecs since this is a satisfaction task assert self.opt['history_size'] > 2 text_vecs = [] cand_vecs = [] for vec in batch.text_vec: last_p1 = ( vec == self.dict.txt2vec('__p1__')[0]).nonzero()[-1].item() last_p2 = ( vec == self.dict.txt2vec('__p2__')[0]).nonzero()[-1].item() text_vecs.append(vec[:last_p2]) cand_vecs.append(vec[last_p2 + 1:last_p1]) text_padded, _ = padded_tensor(text_vecs) cand_padded, _ = padded_tensor(cand_vecs) scores = self.model.score_dialog(text_padded, cand_padded) confidences = F.softmax(scores, dim=1).cpu().detach().numpy() preds = [] for example in confidences: ranked_confidences = sorted(list(example), reverse=True) if self.opt['uncertainty_style'] == 'mag': # If the most confident choice isn't confident enough, predict that # the response the bot gives will be bad (pred=0) mag = ranked_confidences[0] preds.append(mag > self.opt['uncertainty_threshold']) elif self.opt['uncertainty_style'] == 'gap': # If the gap between the first and second most confident choices isn't # large enough, predict that the response the bot gives will be bad # (pred=0) gap = ranked_confidences[0] - ranked_confidences[1] preds.append(gap > self.opt['uncertainty_threshold']) loss = torch.tensor(0) preds = torch.LongTensor(preds) labels = torch.LongTensor([int(l) == 1 for l in batch.labels]) batchsize = len(labels) self.update_sat_metrics(loss, preds, labels, batchsize) return preds
def eval_step(self, batch): """ Evaluate a single batch of examples. """ if batch.text_vec is None: return bsz = batch.text_vec.size(0) self.model.eval() cand_scores = None token_losses = None if batch.label_vec is not None: # calculate loss on targets with teacher forcing loss, model_output = self.compute_loss(batch, return_output=True) self.metrics['loss'] += loss.item() if self.output_token_losses: token_losses = self._construct_token_losses( batch.label_vec, model_output) preds = None if self.skip_generation: warn_once( "--skip-generation does not produce accurate metrics beyond ppl", RuntimeWarning, ) else: maxlen = self.label_truncate or 256 beam_preds_scores, _ = self._generate(batch, self.beam_size, maxlen) preds, scores = zip(*beam_preds_scores) cand_choices = None # TODO: abstract out the scoring here if self.rank_candidates: # compute roughly ppl to rank candidates cand_choices = [] encoder_states = self.model.encoder(*self._model_input(batch)) for i in range(bsz): num_cands = len(batch.candidate_vecs[i]) enc = self.model.reorder_encoder_states( encoder_states, [i] * num_cands) cands, _ = padded_tensor(batch.candidate_vecs[i], self.NULL_IDX, self.use_cuda) scores, _ = self.model.decode_forced(enc, cands) cand_losses = F.cross_entropy( scores.view(num_cands * cands.size(1), -1), cands.view(-1), reduction='none', ).view(num_cands, cands.size(1)) # now cand_losses is cands x seqlen size, but we still need to # check padding and such mask = (cands != self.NULL_IDX).float() cand_scores = (cand_losses * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9) _, ordering = cand_scores.sort() cand_choices.append([batch.candidates[i][o] for o in ordering]) text = [self._v2t(p) for p in preds] if preds is not None else None return Output(text, cand_choices, token_losses=token_losses)
def batchify(self, obs_batch): """ Wizard custom batchify, which passes along the knowledge/title. Following the docstring of TorchAgent.batchify, it calls super, then uses an extended version of the torch_agent.Batch namedtuple. The purpose of extending the info is to keep track of some custom metrics. """ batch = super().batchify(obs_batch) reordered_observations = [obs_batch[i] for i in batch.valid_indices] is_training = 'labels' in reordered_observations[0] # first parse and compile all the knowledge together all_knowledges = [ ] # list-of-lists knowledge items for each observation knowledge_counts = [] # how much knowledge each observation gets for obs in reordered_observations: obs_know = self._parse_knowledge(obs) # downsample if desired if (is_training and self.max_knowledge and len(obs_know) > self.max_knowledge): # offset by one so that we don't choose 0 keepers = 1 + np.random.choice( len(obs_know) - 1, self.max_knowledge, False) # correct answer is always the first one keepers[0] = 0 obs_know = [obs_know[i] for i in keepers] all_knowledges.append(obs_know) knowledge_counts.append(len(obs_know)) # now we want to actually pack this into a tensor, along with the mask N = len(reordered_observations) K = max(knowledge_counts) # round out the array so everything is equally sized for i in range(N): all_knowledges[i] += [''] * (K - knowledge_counts[i]) flattened_knowledge = list(chain(*all_knowledges)) knowledge_vec = [ self._vectorize_text( # the beginning of the sentence is more useful k, truncate=self.knowledge_truncate, add_end=True, truncate_left=False, ) for k in flattened_knowledge ] knowledge_vec, _ = padded_tensor(knowledge_vec, self.NULL_IDX, self.use_cuda, left_padded=True) knowledge_vec[:, -1] = self.END_IDX T = knowledge_vec.size(-1) knowledge_vec = knowledge_vec.view(N, K, T) # knowledge mask is a N x K tensor saying which items we're allowed to # attend over bsz = len(reordered_observations) ck_mask = th.zeros(bsz, K, dtype=th.uint8) for i, klen in enumerate(knowledge_counts): ck_mask[i, :klen] = 1 ck_mask = ck_mask != 0 # for pytorch 1.0/1.2 uint8/bool compatibility # and the correct labels cs_ids = th.LongTensor(bsz).zero_() if self.use_cuda: knowledge_vec = knowledge_vec.cuda() ck_mask = ck_mask.cuda() cs_ids = cs_ids.cuda() batch['know_vec'] = knowledge_vec batch['ck_mask'] = ck_mask batch['cs_ids'] = cs_ids batch['use_cs_ids'] = is_training batch['knowledge'] = np.array(flattened_knowledge).reshape(N, K) return batch
def eval_step(self, batch): """Process batch of inputs. If the batch includes labels, calculate validation metrics as well. If --skip-generation is not set, return a prediction for each input. :param batch: parlai.core.torch_agent.Batch, contains tensorized version of observations. """ if batch.text_vec is None: return self.is_training = False samples = self._make_sample(batch) self.model.eval() if batch.label_vec is not None and self.trainer is not None: # Interactive mode won't have a gold label metrics = self.trainer.valid_step(samples) self._update_metrics(metrics, samples) # Output placeholders reranked_cands = None generated_output = None # Grade each of the candidate sequences if batch.candidate_vecs is not None: bsz = len(batch.text_vec) reranked_cands = [] # score the candidates for each item in the batch separately, so that # we can support variable number of candidates for i in range(bsz): cands = batch.candidate_vecs[i] if not cands: reranked_cands.append(None) continue ncand = len(cands) # repeat the input many times xs = batch.text_vec[i].unsqueeze(0).expand(ncand, -1) # some models crash if there's leading padding on every example xs = xs[:, :batch.text_lengths[i]] # and appropriately pack the outputs ys, _ = padded_tensor(cands, self.NULL_IDX, self.use_cuda) s = self._make_sample(xs=xs, ys=ys) # perform the actual grading, extract the scores scored = list( self.scorer.score_batched_itr([s], cuda=self.use_cuda)) scores = [s[3][0]['score'].item() for s in scored] # intentional hanging comma here; argsort returns a list ranked, = argsort(scores, batch.candidates[i], descending=True) reranked_cands.append(ranked) # Next generate freely to create our response if not self.args.skip_generation: generated_output = self._generate(samples) elif reranked_cands: # we're skiping generation, but we're also grading candidates # so output the highest ranked candidate # In the case of zero candidates, we don't have something to rank, # so we may need to pass on that None generated_output = [ ranked and ranked[0] or None for ranked in reranked_cands ] else: # no output at all pass return Output(generated_output, reranked_cands)
def eval_step(self, batch): """Evaluate a single batch of examples.""" if batch.text_vec is None: return bsz = batch.text_vec.size(0) self.model.eval() cand_scores = None if batch.label_vec is not None: # calculate loss on targets with teacher forcing loss = self.compute_loss(batch) # noqa: F841 we need the side effects self.metrics['loss'] += loss.item() preds = None if self.skip_generation: warn_once( "--skip-generation does not produce accurate metrics beyond ppl", RuntimeWarning, ) else: maxlen = self.label_truncate or 256 beam_preds_scores, _ = self._generate(batch, self.beam_size, maxlen) preds, scores = zip(*beam_preds_scores) cand_choices = None # TODO: abstract out the scoring here if self.rank_candidates: # compute roughly ppl to rank candidates cand_choices = [] encoder_states = self.model.encoder(*self._model_input(batch)) for i in range(bsz): num_cands = len(batch.candidate_vecs[i]) enc = self.model.reorder_encoder_states(encoder_states, [i] * num_cands) cands, _ = padded_tensor( batch.candidate_vecs[i], self.NULL_IDX, self.use_cuda ) scores, _ = self.model.decode_forced(enc, cands) cand_losses = F.cross_entropy( scores.view(num_cands * cands.size(1), -1), cands.view(-1), reduction='none', ).view(num_cands, cands.size(1)) # now cand_losses is cands x seqlen size, but we still need to # check padding and such mask = (cands != self.NULL_IDX).float() cand_scores = (cand_losses * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9) _, ordering = cand_scores.sort() cand_choices.append([batch.candidates[i][o] for o in ordering]) if batch.label_vec is not None and not self.skip_generation: label_text = batch.labels # we are in the validation mode, print some generated responses for debugging for i in range(len(preds)): if random.random() > (1 - self.opt['report_freq']): context_text = batch.observations[i]['text'] print('TEXT: ', context_text) print('TARGET: ', self._v2t(batch.label_vec[i])) print('PREDICTION: ', self._v2t(preds[i]), '\n~') else: label_text = None if self.skip_generation: return None text = [self._v2t(p) for p in preds] if preds is not None else None context = [obs['text'] for obs in batch.observations] return Output(text, cand_choices), label_text, context
def _build_candidates(self, batch, source, mode): """ Build a candidate set for this batch. :param batch: a Batch object (defined in torch_agent.py) :param source: the source from which candidates should be built, one of ['batch', 'batch-all-cands', 'inline', 'fixed'] :param mode: 'train' or 'eval' :return: tuple of tensors (label_inds, cands, cand_vecs) label_inds: A [bsz] LongTensor of the indices of the labels for each example from its respective candidate set cands: A [num_cands] list of (text) candidates OR a [batchsize] list of such lists if source=='inline' cand_vecs: A padded [num_cands, seqlen] LongTensor of vectorized candidates OR a [batchsize, num_cands, seqlen] LongTensor if source=='inline' Possible sources of candidates: * batch: the set of all labels in this batch Use all labels in the batch as the candidate set (with all but the example's label being treated as negatives). Note: with this setting, the candidate set is identical for all examples in a batch. This option may be undesirable if it is possible for duplicate labels to occur in a batch, since the second instance of the correct label will be treated as a negative. * batch-all-cands: the set of all candidates in this batch Use all candidates in the batch as candidate set. Note 1: This can result in a very large number of candidates. Note 2: In this case we will deduplicate candidates. Note 3: just like with 'batch' the candidate set is identical for all examples in a batch. * inline: batch_size lists, one list per example If each example comes with a list of possible candidates, use those. Note: With this setting, each example will have its own candidate set. * fixed: one global candidate list, provided in a file from the user If self.fixed_candidates is not None, use a set of fixed candidates for all examples. Note: this setting is not recommended for training unless the universe of possible candidates is very small. * vocab: one global candidate list, extracted from the vocabulary with the exception of self.NULL_IDX. """ label_vecs = batch.label_vec # [bsz] list of lists of LongTensors label_inds = None batchsize = (batch.text_vec.size(0) if batch.text_vec is not None else batch.image.size(0)) if label_vecs is not None: assert label_vecs.dim() == 2 if source == 'batch': warn_once( '[ Executing {} mode with batch labels as set of candidates. ]' ''.format(mode)) if batchsize == 1: warn_once( "[ Warning: using candidate source 'batch' and observed a " "batch of size 1. This may be due to uneven batch sizes at " "the end of an epoch. ]") if label_vecs is None: raise ValueError( "If using candidate source 'batch', then batch.label_vec cannot be " "None.") cands = batch.labels cand_vecs = label_vecs label_inds = label_vecs.new_tensor(range(batchsize)) elif source == 'batch-all-cands': warn_once( '[ Executing {} mode with all candidates provided in the batch ]' ''.format(mode)) if batch.candidate_vecs is None: raise ValueError( "If using candidate source 'batch-all-cands', then batch." "candidate_vecs cannot be None. If your task does not have " "inline candidates, consider using one of " "--{m}={{'batch','fixed','vocab'}}." "".format(m='candidates' if mode == 'train' else 'eval-candidates')) # initialize the list of cands with the labels cands = [] all_cands_vecs = [] # dictionary used for deduplication cands_to_id = {} for i, cands_for_sample in enumerate(batch.candidates): for j, cand in enumerate(cands_for_sample): if cand not in cands_to_id: cands.append(cand) cands_to_id[cand] = len(cands_to_id) all_cands_vecs.append(batch.candidate_vecs[i][j]) cand_vecs, _ = padded_tensor( all_cands_vecs, self.NULL_IDX, use_cuda=self.use_cuda, fp16friendly=self.fp16, ) label_inds = label_vecs.new_tensor( [cands_to_id[label] for label in batch.labels]) elif source == 'inline': warn_once( '[ Executing {} mode with provided inline set of candidates ]' ''.format(mode)) if batch.candidate_vecs is None: raise ValueError( "If using candidate source 'inline', then batch.candidate_vecs " "cannot be None. If your task does not have inline candidates, " "consider using one of --{m}={{'batch','fixed','vocab'}}." "".format(m='candidates' if mode == 'train' else 'eval-candidates')) cands = batch.candidates cand_vecs = padded_3d( batch.candidate_vecs, self.NULL_IDX, use_cuda=self.use_cuda, fp16friendly=self.fp16, ) if label_vecs is not None: label_inds = label_vecs.new_empty((batchsize)) bad_batch = False for i, label_vec in enumerate(label_vecs): label_vec_pad = label_vec.new_zeros( cand_vecs[i].size(1)).fill_(self.NULL_IDX) if cand_vecs[i].size(1) < len(label_vec): label_vec = label_vec[0:cand_vecs[i].size(1)] label_vec_pad[0:label_vec.size(0)] = label_vec label_inds[i] = self._find_match(cand_vecs[i], label_vec_pad) if label_inds[i] == -1: bad_batch = True if bad_batch: if self.ignore_bad_candidates and not self.is_training: label_inds = None else: raise RuntimeError( 'At least one of your examples has a set of label candidates ' 'that does not contain the label. To ignore this error ' 'set `--ignore-bad-candidates True`.') elif source == 'fixed': if self.fixed_candidates is None: raise ValueError( "If using candidate source 'fixed', then you must provide the path " "to a file of candidates with the flag --fixed-candidates-path or " "the name of a task with --fixed-candidates-task.") warn_once( "[ Executing {} mode with a common set of fixed candidates " "(n = {}). ]".format(mode, len(self.fixed_candidates))) cands = self.fixed_candidates cand_vecs = self.fixed_candidate_vecs if label_vecs is not None: label_inds = label_vecs.new_empty((batchsize)) bad_batch = False for batch_idx, label_vec in enumerate(label_vecs): max_c_len = cand_vecs.size(1) label_vec_pad = label_vec.new_zeros(max_c_len).fill_( self.NULL_IDX) if max_c_len < len(label_vec): label_vec = label_vec[0:max_c_len] label_vec_pad[0:label_vec.size(0)] = label_vec label_inds[batch_idx] = self._find_match( cand_vecs, label_vec_pad) if label_inds[batch_idx] == -1: bad_batch = True if bad_batch: if self.ignore_bad_candidates and not self.is_training: label_inds = None else: raise RuntimeError( 'At least one of your examples has a set of label candidates ' 'that does not contain the label. To ignore this error ' 'set `--ignore-bad-candidates True`.') elif source == 'vocab': warn_once( '[ Executing {} mode with tokens from vocabulary as candidates. ]' ''.format(mode)) cands = self.vocab_candidates cand_vecs = self.vocab_candidate_vecs # NOTE: label_inds is None here, as we will not find the label in # the set of vocab candidates else: raise Exception("Unrecognized source: %s" % source) return (cands, cand_vecs, label_inds)