def train_step(self, batch): """Train on a single batch of examples.""" if batch.text_vec is None: return batchsize = batch.text_vec.size(0) self.model.train() self.zero_grad() cands, cand_vecs, label_inds = self._build_candidates( batch, source=self.opt['candidates'], mode='train') scores = self.score_candidates(batch, cand_vecs) loss = self.rank_loss(scores, label_inds) # Update loss self.metrics['loss'] += loss.item() self.metrics['examples'] += batchsize loss.backward() self.update_params() # Get train predictions if self.opt['candidates'] == 'batch': self.get_batch_train_metrics(scores) return Output() if not self.opt.get('train_predict', False): warn_once( "Some training metrics are omitted for speed. Set the flag " "`--train-predict` to calculate train metrics.") return Output() return self.get_train_preds(scores, label_inds, cands, cand_vecs)
def is_valid(self, obs): """ Override from TorchAgent. Check to see if label candidates contain the label. """ if not self.ignore_bad_candidates: return super().is_valid(obs) if not super().is_valid(obs): return False # skip examples for which the set of label candidates do not # contain the label if 'labels_vec' in obs and 'label_candidates_vecs' in obs: cand_vecs = obs['label_candidates_vecs'] label_vec = obs['labels_vec'] matches = [x for x in cand_vecs if torch.equal(x, label_vec)] if len(matches) == 0: warn_once( 'At least one example has a set of label candidates that ' 'does not contain the label.') return False return True
def receive_metrics(self, metrics_dict): """Use the metrics to decide when to adjust LR schedule. This uses the loss as the validation metric if present, if not this function does nothing. Note that the model must be reporting loss for this to work. Override this to override the behavior. """ if self._is_lr_warming_up(): # we're not done warming up, so don't start using validation # metrics to adjust schedule return if self.opt['lr_scheduler'] == 'reduceonplateau': if 'loss' not in metrics_dict: # nothing to step on, just skip warn_once("LR scheduler expected to see loss metric, but didn't.") return self.scheduler.step(metrics_dict['loss']) elif self.opt['lr_scheduler'] == 'fixed': self.scheduler.step() elif self.opt['lr_scheduler'] == 'invsqrt': # this is a training step lr scheduler, nothing to adjust in validation pass elif self.opt['lr_schedule'] == 'none': # no adjustments, do nothing pass else: raise ValueError( "Don't know how to work with lr scheduler '{}'" .format(self.opt['lr_schedulre']) )
def feedback_step(self, batch): batchsize = batch.text_vec.size(0) warn_once("WARNING: feedback candidates are hardcoded to batch") if self.model.training: cands, cand_vecs, label_inds = self._build_candidates( batch, source='batch', mode='train') else: cands, cand_vecs, label_inds = self._build_candidates( batch, source='batch', mode='eval') scores = self.model.score_feedback(batch.text_vec, cand_vecs) _, ranks = scores.sort(1, descending=True) if self.model.training: # Get predictions but not full rankings for the sake of speed cand_ranked = None preds = [cands[ordering[0]] for ordering in ranks] else: # Return full rankings to calculate hits@ metrics cand_ranked = [] for ordering in ranks: cand_ranked.append([cands[rank] for rank in ordering]) preds = [cand_ranked[i][0] for i in range(batchsize)] if label_inds is None: loss = None else: loss = self.rank_loss(scores, label_inds) self.update_fee_metrics(loss, ranks, label_inds, batchsize) return loss, preds, cand_ranked
def forward(self, input, encoder_state, incr_state=None): """ Forward pass. :param LongTensor[batch,seqlen] input: The decoder inputs (partial or full decoded token IDs). :param encoder_state: Output from the encoder module forward pass. :param incr_state: Ignored. Should always be ``None`` in this version. """ encoder_output, encoder_mask = encoder_state seq_len = input.size(1) positions = input.new(seq_len).long() positions = torch.arange(seq_len, out=positions).unsqueeze(0) tensor = self.embeddings(input) if self.embeddings_scale: tensor = tensor * np.sqrt(self.dim) if self.variant == 'xlm': tensor = _normalize(tensor, self.norm_embeddings) if positions.max().item() > self.n_positions: warn_once( 'You are inputting a sequence of {x} length, but only have ' '--n-positions {y}. Set --truncate or increase --n-positions'. format(x=positions.max().item(), y=self.n_positions)) tensor = tensor + self.position_embeddings(positions).expand_as(tensor) tensor = self.dropout(tensor) # --dropout for layer in self.layers: tensor = layer(tensor, encoder_output, encoder_mask) return tensor, None
def _path(opt): build(opt) datatype = opt['datatype'].split(':')[0] if datatype == 'test': warn_once("WARNING: Test set not included. Setting datatype to valid.") datatype = 'valid' return make_path(opt, datatype + '.txt')
def _path(opt): # Build the data if it doesn't exist. build(opt) dt = opt['datatype'].split(':')[0] if dt == 'test': warn_once("WARNING: Test set not included. Setting datatype to valid.") dt = 'valid' return os.path.join(opt['datapath'], 'CoQA', dt + '.txt')
def forward(self, input, positions=None, segments=None): """ Forward pass. :param LongTensor[batch,seqlen] input: The input IDs :param BoolTensor[batch,seqlen] mask: The attention mask; 1 means attend, 0 means ignore. :param LongTensor[batch,seqlen]: If provided, additionally adds ``segments`` as extra embedding features. """ mask = input != self.padding_idx if positions is None: positions = (mask.cumsum(dim=1, dtype=torch.int64) - 1).clamp_(min=0) tensor = self.embeddings(input) if self.embeddings_scale: tensor = tensor * np.sqrt(self.dim) if positions.max().item() > self.n_positions: warn_once( 'You are inputting a sequence of {x} length, but only have ' '--n-positions {y}. Set --truncate or increase --n-positions'.format( x=positions.max().item(), y=self.n_positions ) ) tensor = tensor + self.position_embeddings(positions).expand_as(tensor) if self.n_segments >= 1: if segments is None: segments = torch.zeros_like(input) tensor = tensor + self.segment_embeddings(segments) if self.variant == 'xlm': tensor = _normalize(tensor, self.norm_embeddings) # --dropout on the embeddings tensor = self.dropout(tensor) tensor *= mask.unsqueeze(-1).type_as(tensor) for i in range(self.n_layers): tensor = self.layers[i](tensor, mask) tensor *= self.output_scaling if self.reduction_type == 'first': return tensor[:, 0, :] elif self.reduction_type == 'max': return tensor.max(dim=1)[0] elif self.reduction_type == 'mean': divisor = mask.float().sum(dim=1).unsqueeze(-1).clamp(min=1).type_as(tensor) output = tensor.sum(dim=1) / divisor return output elif self.reduction_type == 'none' or self.reduction_type is None: output = tensor return output, mask else: raise ValueError( "Can't handle --reduction-type {}".format(self.reduction_type) )
def __init__(self, opt, shared=None): if opt['interactive']: print("[ Setting interactive mode defaults... ]") opt['prev_response_filter'] = True opt['person_tokens'] = True # Set subtasks first so that opt['subtasks'] is set before build_model() self.set_subtasks(opt) self.multitask = len(self.subtasks) > 1 if not self.multitask: self.subtask = self.subtasks[0] if opt['prev_response_negatives']: if opt['candidates'] in ['fixed', 'vocab']: msg = ( "[ Option --prev-response-negatives=True is incompatible with " "--candidates=['fixed','vocab']. Overriding it to False. ]" ) warn_once(msg) self.opt['prev_response_negatives'] = False self.prev_responses = None if opt['prev_response_filter']: if not opt['interactive']: msg = ( "[ Option --prev-response-filter=True can only be used when " "--interactive=True. Overriding it to False. ]") warn_once(msg) self.opt['prev_response_filter'] = False self.prev_response = None super().__init__(opt, shared) self.status = NORMAL if self.opt['interactive']: assert 'dialog' in self.subtasks assert 'sentiment' in self.subtasks else: assert not self.opt['request_explanation'] assert not self.opt['request_rating'] self.task_weight = { 'dialog': opt['dia_weight'], 'explanation': opt['exp_weight'], 'sentiment': opt['sen_weight'], } # dialog/explanation tasks use self.rank_loss from TorchRankerAgent # Don't do BCEWithLogitsLoss since we need the probs from the sigmoid anyway self.sentiment_criterion = nn.BCELoss(reduce=True, size_average=False) # Set rating classifier if opt['regex']: self.rating_classifier = FeedbackClassifierRegex() random.seed() self.history = [ ] # Overwrite the deque; keep the whole history and slice self.reset()
def eval_step(self, batch): """Evaluate a single batch of examples.""" if batch.text_vec is None: return self.model.eval() if batch.label_vec is not None: # calculate loss on targets with teacher forcing # loss = self.compute_loss(batch) # noqa: F841 we need the side effects # self.metrics['loss'] += loss.item() valid_loss = self.model.valid(batch.text_vec, batch.context_lens, batch.text_lengths, batch.floors, batch.label_vec, batch.label_lengths, self.criterion) self.metrics['correct_tokens'] += valid_loss['correct_tokens'] self.metrics['nll_loss'] += valid_loss['nll_loss'] self.metrics['num_tokens'] += valid_loss['num_tokens'] self.metrics['loss'] += valid_loss['avg_loss'] if self.opt.get('hred', False): pass elif self.opt.get('vhred', False): self.metrics['kl_loss_cnt'] += 1 self.metrics['kl_loss'] += valid_loss['vhred_kl_loss'] self.metrics['bow_loss_cnt'] += 1 self.metrics['bow_loss'] += valid_loss['bow_loss'] else: self.metrics['loss_G_cnt'] += 1 self.metrics['loss_G'] += valid_loss['valid_loss_G'] self.metrics['loss_D_cnt'] += 1 self.metrics['loss_D'] += valid_loss['valid_loss_D'] preds = None if self.skip_generation: # noinspection PyTypeChecker warn_once( "--skip-generation does not produce accurate metrics beyond ppl", RuntimeWarning) else: sample_words, sample_lens = self.model.sample( batch.text_vec, batch.context_lens, batch.text_lengths, batch.floors, self.START_IDX, self.END_IDX) preds = torch.from_numpy(sample_words) if batch.label_vec is not None: label_text = batch.labels # we are in the validation mode, print some generated responses for debugging for i in range(len(preds)): if random.random() > (1 - self.opt['report_freq']): context_text = batch.observations[i]['text'] print('TEXT: ', context_text) print('TARGET: ', label_text[i]) print('PREDICTION: ', self._v2t(preds[i]), '\n~') else: label_text = None context = [obs['text'] for obs in batch.observations] text = [self._v2t(p) for p in preds] if preds is not None else None return Output(text, None), label_text, context
def _path(opt, teacher_name): # Build the data if it doesn't exist. build(opt) dt = opt['datatype'].split(':')[0] if dt == 'test': warn_once('WARNING: Test set not included. Setting datatype to valid.') dt = 'valid' if dt == 'valid': dt = dt + '_' + teacher_name return os.path.join(opt['datapath'], 'HotpotQA', dt + '.txt')
def _upgrade_opt(self, opt): model_opt = opt['model_file'] + '.opt' if not os.path.isfile(model_opt): return old_opt = load_opt_file(model_opt) if 'add_cls_token' not in old_opt: # old model, make this default to False warn_once('Old model: overriding `add_cls_token` to False.') opt['add_cls_token'] = False return
def _path(opt, persona, use_cands): # Build the data if it doesn't exist. build(opt) datatype = opt['datatype'].split(':')[0] if datatype == 'test': warn_once("WARNING: Test set not included. Setting datatype to valid.") datatype = 'valid' dt = datatype + '_' + persona cands = '' if use_cands else '_no_cands' return os.path.join(opt['datapath'], 'ConvAI2', dt + cands + '.txt')
def eval_step(self, batch): """Evaluate a single batch of examples.""" if batch.text_vec is None: return bsz = batch.text_vec.size(0) self.model.eval() cand_scores = None if batch.label_vec is not None: # calculate loss on targets with teacher forcing loss = self.compute_loss( batch) # noqa: F841 we need the side effects self.metrics['loss'] += loss.item() preds = None if self.skip_generation: warn_once( "--skip-generation does not produce accurate metrics beyond ppl", RuntimeWarning, ) else: maxlen = self.label_truncate or 256 beam_preds_scores, _ = self._generate(batch, self.beam_size, maxlen) preds, scores = zip(*beam_preds_scores) cand_choices = None # TODO: abstract out the scoring here if self.rank_candidates: # compute roughly ppl to rank candidates cand_choices = [] encoder_states = self.model.encoder(*self._model_input(batch)) for i in range(bsz): num_cands = len(batch.candidate_vecs[i]) enc = self.model.reorder_encoder_states( encoder_states, [i] * num_cands) cands, _ = padded_tensor(batch.candidate_vecs[i], self.NULL_IDX, self.use_cuda) scores, _ = self.model.decode_forced(enc, cands) cand_losses = F.cross_entropy( scores.view(num_cands * cands.size(1), -1), cands.view(-1), reduction='none', ).view(num_cands, cands.size(1)) # now cand_losses is cands x seqlen size, but we still need to # check padding and such mask = (cands != self.NULL_IDX).float() cand_scores = (cand_losses * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9) _, ordering = cand_scores.sort() cand_choices.append([batch.candidates[i][o] for o in ordering]) text = [self._v2t(p) for p in preds] if preds is not None else None return Output(text, cand_choices)
def upgrade_opt(cls, opt_on_disk): """Upgrade opts from older model files.""" super(BertClassifierAgent, cls).upgrade_opt(opt_on_disk) # 2019-06-25: previous versions of the model did not add a CLS token # to the beginning of text_vec. if 'add_cls_token' not in opt_on_disk: warn_once('Old model: overriding `add_cls_token` to False.') opt_on_disk['add_cls_token'] = False return opt_on_disk
def upgrade_opt(cls, opt_on_disk): """Upgrade opts from older model files.""" super(SelfFeedingAgent, cls).upgrade_opt(opt_on_disk) # 2019-06-25: previous versions of the model did not add a CLS token # to the beginning of text_vec. if 'add_double_person_tokens' not in opt_on_disk: warn_once( 'Old model: overriding `add_double_person_tokens` to True.') opt_on_disk['add_double_person_tokens'] = True return opt_on_disk
def forward(self, input, positions=None, segments=None): """ input data is a FloatTensor of shape [batch, seq_len, dim] mask is a ByteTensor of shape [batch, seq_len], filled with 1 when inside the sequence and 0 outside. """ mask = input != self.padding_idx if positions is None: positions = (mask.cumsum(dim=1, dtype=torch.int64) - 1).clamp_(min=0) tensor = self.embeddings(input) if self.embeddings_scale: tensor = tensor * np.sqrt(self.dim) if positions.max().item() > self.n_positions: warn_once( 'You are inputting a sequence of {x} length, but only have ' '--n-positions {y}. Set --truncate or increase --n-positions'. format(x=positions.max().item(), y=self.n_positions)) tensor = tensor + self.position_embeddings(positions).expand_as(tensor) if self.n_segments >= 1: if segments is None: segments = torch.zeros_like(input) tensor = tensor + self.segment_embeddings(segments) if self.variant == 'xlm': tensor = _normalize(tensor, self.norm_embeddings) # --dropout on the embeddings tensor = self.dropout(tensor) tensor *= mask.unsqueeze(-1).type_as(tensor) for i in range(self.n_layers): tensor = self.layers[i](tensor, mask) if self.reduction_type == 'first': return tensor[:, 0, :] elif self.reduction_type == 'max': return tensor.max(dim=1)[0] elif self.reduction_type == 'mean': divisor = mask.float().sum(dim=1).unsqueeze(-1).clamp( min=1).type_as(tensor) output = tensor.sum(dim=1) / divisor return output elif self.reduction_type == 'none' or self.reduction_type is None: output = tensor return output, mask else: raise ValueError("Can't handle --reduction-type {}".format( self.reduction_type))
def is_valid(self, obs): normally_valid = super().is_valid(obs) if not normally_valid: # shortcut boolean evaluation return normally_valid contains_empties = obs['text_vec'].shape[0] == 0 if self.is_training and contains_empties: warn_once( 'seq2seq got an empty input sequence (text_vec) during training. ' 'Skipping this example, but you should check your dataset and ' 'preprocessing.') elif not self.is_training and contains_empties: warn_once('seq2seq got an empty input sequence (text_vec) in an ' 'evaluation example! This may affect your metrics!') return not contains_empties
def upgrade_opt(cls, opt_from_disk): # call the parent upgrades opt_from_disk = super(TorchGeneratorAgent, cls).upgrade_opt(opt_from_disk) # 2019-08-18: Adding support for generation other than beam search # Previously, selecting --beam-size > 1 enabled beam search and == 1 was # greedy. New behavior is --inference greedy or --inference beam. if 'inference' not in opt_from_disk: assert 'beam_size' in opt_from_disk if opt_from_disk['beam_size'] == 1: method = 'greedy' else: method = 'beam' opt_from_disk['inference'] = method warn_once(f'Old model inference method inferred as {method}') return opt_from_disk
def train_step(self, batch): """Train on a single batch of examples.""" if batch.text_vec is None: return batchsize = batch.text_vec.size(0) self.model.train() self.zero_grad() cands, cand_vecs, label_inds = self._build_candidates( batch, source=self.candidates, mode='train' ) try: scores = self.score_candidates(batch, cand_vecs) loss = self.rank_loss(scores, label_inds) self.backward(loss) self.update_params() except RuntimeError as e: # catch out of memory exceptions during fwd/bck (skip batch) if 'out of memory' in str(e): print( '| WARNING: ran out of memory, skipping batch. ' 'if this happens frequently, decrease batchsize or ' 'truncate the inputs to the model.' ) return Output() else: raise e # Update loss self.metrics['loss'] += loss.item() self.metrics['examples'] += batchsize # Get train predictions if self.candidates == 'batch': self._get_batch_train_metrics(scores) return Output() if not self.opt.get('train_predict', False): warn_once( "Some training metrics are omitted for speed. Set the flag " "`--train-predict` to calculate train metrics." ) return Output() return self._get_train_preds(scores, label_inds, cands, cand_vecs)
def _extract_prev_responses(self, batch): # Extract prev_responses for self-feeding formatted examples warn_once( "WARNING: This code is specific to self-feeding formatted examples" ) # TODO: Pull out p1/p2 once elsewhere, not every time p1 = self.dict.txt2vec('__p1__')[0] p2 = self.dict.txt2vec('__p2__')[0] self.prev_responses = [] # Do naively for now with a for loop for text_vec in batch.text_vec: p1s = (text_vec == p1).nonzero() p2s = (text_vec == p2).nonzero() if len(p1s) and len(p2s): response_vec = text_vec[p2s[-1] + 1:p1s[-1]] else: response_vec = [self.NULL_IDX] # TODO: pull in actual N response = self.dict.vec2txt(response_vec) self.prev_responses.append(response)
def _rouge(guess, answers): global rouge """Compute ROUGE score between guess and *any* answers. Return the best.""" if rouge is None: return None, None, None evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l'], max_n=2) try: scores = [ evaluator.get_scores(normalize_answer(guess), normalize_answer(a)) for a in answers ] except LookupError: warn_once('ROUGE requires nltk punkt tokenizer. Please run ' '`python -c "import nltk; nltk.download(\'punkt\')`') rouge = None return None, None, None scores_rouge1 = [score['rouge-1']['r'] for score in scores] scores_rouge2 = [score['rouge-2']['r'] for score in scores] scores_rougeL = [score['rouge-l']['r'] for score in scores] return max(scores_rouge1), max(scores_rouge2), max(scores_rougeL)
def forward(self, input, encoder_state, incr_state=None): encoder_output, encoder_mask = encoder_state seq_len = input.size(1) positions = input.new(seq_len).long() positions = torch.arange(seq_len, out=positions).unsqueeze(0) tensor = self.embeddings(input) if self.embeddings_scale: tensor = tensor * np.sqrt(self.dim) if self.variant == 'xlm': tensor = _normalize(tensor, self.norm_embeddings) if positions.max().item() > self.n_positions: warn_once( 'You are inputting a sequence of {x} length, but only have ' '--n-positions {y}. Set --truncate or increase --n-positions'. format(x=positions.max().item(), y=self.n_positions)) tensor = tensor + self.position_embeddings(positions).expand_as(tensor) tensor = self.dropout(tensor) # --dropout for layer in self.layers: tensor = layer(tensor, encoder_output, encoder_mask) return tensor, None
def is_valid(self, obs): """Override from TorchAgent.""" if not self.opt.get('ignore_bad_candidates', False): return super().is_valid(obs) if 'text_vec' not in obs and 'image' not in obs: # TODO: this should really be a call to super, i.e. # if not super().is_valid(obs): return False return False # skip examples for which the set of label candidates do not # contain the label if 'labels_vec' in obs and 'label_candidates_vecs' in obs: cand_vecs = obs['label_candidates_vecs'] label_vec = obs['labels_vec'] matches = [x for x in cand_vecs if torch.equal(x, label_vec)] if len(matches) == 0: warn_once( 'At least one example has a set of label candidates that ' 'does not contain the label.') return False return True
def verify(opt, printargs=None, print_parser=None): # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() counts = {} counts['missing_text'] = 0 counts['missing_labels'] = 0 counts['missing_label_candidates'] = 0 counts['empty_label_candidates'] = 0 # Show some example dialogs. while not world.epoch_done(): world.parley() act = world.acts[0] if 'text' not in act: warn_once("warning: missing text field") counts['missing_text'] += 1 if 'labels' not in act and 'eval_labels' not in act: warn_once("warning: missing labels/eval_labels field") counts['missing_labels'] += 1 else: if 'label_candidates' not in act: counts['missing_label_candidates'] += 1 else: for c in act['label_candidates']: if c == '': warn_once("warning: empty string label_candidate") counts['empty_label_candidates'] += 1 if log_time.time() > log_every_n_secs: text, log = report(world, counts, log_time) if print_parser: print(text) try: # print dataset size if available print('[ loaded {} episodes with a total of {} examples ]'.format( world.num_episodes(), world.num_examples())) except Exception: pass return report(world, counts, log_time)
def eval_step(self, batch): """Evaluate a single batch of examples.""" if batch.text_vec is None: return bsz = batch.text_vec.size(0) self.model.eval() cand_scores = None if getattr(batch, 'movies', None): assert hasattr(self.model, 'kbrd') self.model.user_representation, _ = self.model.kbrd.user_representation( batch.movies) self.model.user_representation = self.model.user_representation.detach( ) if batch.label_vec is not None: # calculate loss on targets with teacher forcing loss = self.compute_loss( batch) # noqa: F841 we need the side effects self.metrics['loss'] += loss.item() preds = None if self.skip_generation: warn_once( "--skip-generation does not produce accurate metrics beyond ppl", RuntimeWarning) elif self.beam_size == 1: # greedy decode _, preds, *_ = self.model(*self._model_input(batch), bsz=bsz) elif self.beam_size > 1: out = self.beam_search(self.model, batch, self.beam_size, start=self.START_IDX, end=self.END_IDX, pad=self.NULL_IDX, min_length=self.beam_min_length, min_n_best=self.beam_min_n_best, block_ngram=self.beam_block_ngram) beam_preds_scores, _, beams = out preds, scores = zip(*beam_preds_scores) if self.beam_dot_log is True: self._write_beam_dots(batch.text_vec, beams) cand_choices = None # TODO: abstract out the scoring here if self.rank_candidates: # compute roughly ppl to rank candidates cand_choices = [] encoder_states = self.model.encoder(*self._model_input(batch)) for i in range(bsz): num_cands = len(batch.candidate_vecs[i]) enc = self.model.reorder_encoder_states( encoder_states, [i] * num_cands) cands, _ = padded_tensor(batch.candidate_vecs[i], self.NULL_IDX, self.use_cuda) scores, _ = self.model.decode_forced(enc, cands) cand_losses = F.cross_entropy( scores.view(num_cands * cands.size(1), -1), cands.view(-1), reduction='none', ).view(num_cands, cands.size(1)) # now cand_losses is cands x seqlen size, but we still need to # check padding and such mask = (cands != self.NULL_IDX).float() cand_scores = (cand_losses * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9) _, ordering = cand_scores.sort() cand_choices.append([batch.candidates[i][o] for o in ordering]) text = [self._v2t(p) for p in preds] if preds is not None else None return Output(text, cand_choices)
def train(self): if is_distributed(): warn_once( "Distributed training outputs average-per-worker metrics during " "training, and may be slightly distorted. Validation/test are " "unadulterated.") opt = self.opt world = self.world with world: while True: # do one example / batch of examples world.parley() self.parleys += 1 # print(world.display()) # get the total training examples done, compute epochs self._total_epochs = ( self._preempted_epochs + num_workers() * self.world.get_total_epochs()) exs_per_epoch = self.world.num_examples() self._total_exs = int( np.round(self._total_epochs * exs_per_epoch)) # and use the primary worker's timings for everything train_time, log_time, validate_time = sync_object( (self.train_time.time(), self.log_time.time(), self.validate_time.time())) # check counters and timers if self._total_epochs >= self.max_num_epochs: self.log() print( '[ num_epochs completed:{} time elapsed:{}s ]'.format( self.max_num_epochs, train_time)) break if train_time > self.max_train_time: print('[ max_train_time elapsed:{}s ]'.format(train_time)) break if log_time > self.log_every_n_secs: self.log() if (validate_time > self.val_every_n_secs or self._total_epochs - self.last_valid_epoch >= self.val_every_n_epochs): stop_training = self.validate() self.last_valid_epoch = self._total_epochs if stop_training: break if (self.save_time.time() > self.save_every_n_secs and opt.get('model_file') and is_primary_worker()): print("[ saving model checkpoint: {}.checkpoint".format( opt['model_file'])) self.save_model('.checkpoint') self.save_time.reset() if not self.saved and is_primary_worker(): # save agent self.save_model() elif opt.get('model_file'): # reload best validation model self.agent = create_agent(opt) valid_world = _maybe_load_eval_world(self.agent, opt, 'valid') v_report = run_eval(valid_world, opt, 'valid', write_log=True) test_world = _maybe_load_eval_world(self.agent, opt, 'test') t_report = run_eval(test_world, opt, 'test', write_log=True) if valid_world: valid_world.shutdown() if test_world: test_world.shutdown() return v_report, t_report
def load_agent_module(opt): """Load agent options and module from file if opt file exists. Checks to see if file exists opt['model_file'] + ".opt"; if so, load up the options from the file and use that to create an agent, loading the model type from that file and overriding any options specified in that file when instantiating the agent. If that file does not exist, return None. """ model_file = opt['model_file'] optfile = model_file + '.opt' if os.path.isfile(optfile): new_opt = _load_opt_file(optfile) if 'batchindex' in new_opt: # This saved variable can cause trouble if we switch to BS=1 at test time del new_opt['batchindex'] # only override opts specified in 'override' dict if opt.get('override'): for k, v in opt['override'].items(): if str(v) != str(new_opt.get(k, None)): print("[ warning: overriding opt['{}'] to {} (" "previously: {} )]".format(k, v, new_opt.get(k, None))) new_opt[k] = v # add model arguments to new_opt if they aren't in new_opt already for k, v in opt.items(): if k not in new_opt: new_opt[k] = v new_opt['model_file'] = model_file if not new_opt.get('dict_file'): new_opt['dict_file'] = model_file + '.dict' elif new_opt.get('dict_file') and not os.path.isfile( new_opt['dict_file']): old_dict_file = new_opt['dict_file'] new_opt['dict_file'] = model_file + '.dict' if not os.path.isfile(new_opt['dict_file']): warn_once( 'WARNING: Neither the specified dict file ({}) nor the ' '`model_file`.dict file ({}) exists, check to make sure either ' 'is correct. This may manifest as a shape mismatch later ' 'on.'.format(old_dict_file, new_opt['dict_file'])) model_class = get_agent_module(new_opt['model']) # check for model version if hasattr(model_class, 'model_version'): curr_version = new_opt.get('model_version', 0) if curr_version != model_class.model_version(): model = new_opt['model'] m = ('It looks like you are trying to load an older version of' ' the selected model. Change your model argument to use ' 'the old version from parlai/agents/legacy_agents: for ' 'example: `-m legacy:{m}:{v}` or ' '`--model parlai.agents.legacy_agents.{m}.{m}_v{v}:{c}`') if '.' not in model: # give specific error message if it's easy raise RuntimeError( m.format(m=model, v=curr_version, c=model_class.__name__)) else: # otherwise generic one raise RuntimeError( m.format(m='modelname', v=curr_version, c='ModelAgent')) # if we want to load weights from --init-model, compare opts with # loaded ones compare_init_model_opts(opt, new_opt) return model_class(new_opt) else: return None
def _build_candidates(self, batch, source, mode): """Build a candidate set for this batch :param batch: a Batch object (defined in torch_agent.py) :param source: the source from which candidates should be built, one of ['batch', 'inline', 'fixed'] :param mode: 'train' or 'eval' :return: tuple of tensors (label_inds, cands, cand_vecs) label_inds: A [bsz] LongTensor of the indices of the labels for each example from its respective candidate set cands: A [num_cands] list of (text) candidates OR a [batchsize] list of such lists if source=='inline' cand_vecs: A padded [num_cands, seqlen] LongTensor of vectorized candidates OR a [batchsize, num_cands, seqlen] LongTensor if source=='inline' Possible sources of candidates: * batch: the set of all labels in this batch Use all labels in the batch as the candidate set (with all but the example's label being treated as negatives). Note: with this setting, the candidate set is identical for all examples in a batch. This option may be undesirable if it is possible for duplicate labels to occur in a batch, since the second instance of the correct label will be treated as a negative. * inline: batch_size lists, one list per example If each example comes with a list of possible candidates, use those. Note: With this setting, each example will have its own candidate set. * fixed: one global candidate list, provided in a file from the user If self.fixed_candidates is not None, use a set of fixed candidates for all examples. Note: this setting is not recommended for training unless the universe of possible candidates is very small. * vocab: one global candidate list, extracted from the vocabulary with the exception of self.NULL_IDX. """ label_vecs = batch.label_vec # [bsz] list of lists of LongTensors label_inds = None batchsize = batch.text_vec.shape[0] if label_vecs is not None: assert label_vecs.dim() == 2 if source == 'batch': warn_once( '[ Executing {} mode with batch labels as set of candidates. ]' ''.format(mode)) if batchsize == 1: warn_once( "[ Warning: using candidate source 'batch' and observed a " "batch of size 1. This may be due to uneven batch sizes at " "the end of an epoch. ]") if label_vecs is None: raise ValueError( "If using candidate source 'batch', then batch.label_vec cannot be " "None.") cands = batch.labels cand_vecs = label_vecs label_inds = label_vecs.new_tensor(range(batchsize)) elif source == 'inline': warn_once( '[ Executing {} mode with provided inline set of candidates ]' ''.format(mode)) if batch.candidate_vecs is None: raise ValueError( "If using candidate source 'inline', then batch.candidate_vecs " "cannot be None. If your task does not have inline candidates, " "consider using one of --{m}={{'batch','fixed','vocab'}}." "".format(m='candidates' if mode == 'train' else 'eval-candidates')) cands = batch.candidates cand_vecs = padded_3d(batch.candidate_vecs, use_cuda=self.use_cuda) if label_vecs is not None: label_inds = label_vecs.new_empty((batchsize)) for i, label_vec in enumerate(label_vecs): label_vec_pad = label_vec.new_zeros(cand_vecs[i].size(1)) label_vec_pad[0:label_vec.size(0)] = label_vec label_inds[i] = self._find_match(cand_vecs[i], label_vec_pad) elif source == 'fixed': warn_once( "[ Executing {} mode with a common set of fixed candidates " "(n = {}). ]".format(mode, len(self.fixed_candidates))) if self.fixed_candidates is None: raise ValueError( "If using candidate source 'fixed', then you must provide the path " "to a file of candidates with the flag --fixed-candidates-path" ) cands = self.fixed_candidates cand_vecs = self.fixed_candidate_vecs if label_vecs is not None: label_inds = label_vecs.new_empty((batchsize)) for i, label_vec in enumerate(label_vecs): label_inds[i] = self._find_match(cand_vecs, label_vec) elif source == 'vocab': warn_once( '[ Executing {} mode with tokens from vocabulary as candidates. ]' ''.format(mode)) cands = self.vocab_candidates cand_vecs = self.vocab_candidate_vecs if label_vecs is not None: label_inds = label_vecs.new_empty((batchsize)) for i, label_vec in enumerate(label_vecs): label_inds[i] = self._find_match(cand_vecs, label_vec) return (cands, cand_vecs, label_inds)
ROUGE_METRICS = {'rouge-1', 'rouge-2', 'rouge-L'} ALL_METRICS = DEFAULT_METRICS | ROUGE_METRICS try: from nltk.translate import bleu_score as nltkbleu except ImportError: # User doesn't have nltk installed, so we can't use it for bleu # We'll just turn off things, but we might want to warn the user nltkbleu = None try: import rouge as rouge except ImportError: # User doesn't have rouge installed, so we can't use it for rouge # We'll just turn off things, but we might want to warn the user warn_once( 'Rouge metrics require py-rouge. Please run `pip install py-rouge`.') rouge = None re_art = re.compile(r'\b(a|an|the)\b') re_punc = re.compile(r'[!"#$%&()*+,-./:;<=>?@\[\]\\^`{|}~_\']') def normalize_answer(s): """Lower text and remove punctuation, articles and extra whitespace.""" def remove_articles(text): return re_art.sub(' ', text) def white_space_fix(text): return ' '.join(text.split()) def remove_punc(text):