示例#1
0
    def train_step(self, batch):
        """Train on a single batch of examples."""
        if batch.text_vec is None:
            return
        batchsize = batch.text_vec.size(0)
        self.model.train()
        self.zero_grad()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.opt['candidates'], mode='train')
        scores = self.score_candidates(batch, cand_vecs)
        loss = self.rank_loss(scores, label_inds)

        # Update loss
        self.metrics['loss'] += loss.item()
        self.metrics['examples'] += batchsize
        loss.backward()
        self.update_params()

        # Get train predictions
        if self.opt['candidates'] == 'batch':
            self.get_batch_train_metrics(scores)
            return Output()
        if not self.opt.get('train_predict', False):
            warn_once(
                "Some training metrics are omitted for speed. Set the flag "
                "`--train-predict` to calculate train metrics.")
            return Output()
        return self.get_train_preds(scores, label_inds, cands, cand_vecs)
示例#2
0
    def is_valid(self, obs):
        """
        Override from TorchAgent.

        Check to see if label candidates contain the label.
        """
        if not self.ignore_bad_candidates:
            return super().is_valid(obs)

        if not super().is_valid(obs):
            return False

        # skip examples for which the set of label candidates do not
        # contain the label
        if 'labels_vec' in obs and 'label_candidates_vecs' in obs:
            cand_vecs = obs['label_candidates_vecs']
            label_vec = obs['labels_vec']
            matches = [x for x in cand_vecs if torch.equal(x, label_vec)]
            if len(matches) == 0:
                warn_once(
                    'At least one example has a set of label candidates that '
                    'does not contain the label.')
                return False

        return True
示例#3
0
    def receive_metrics(self, metrics_dict):
        """Use the metrics to decide when to adjust LR schedule.

        This uses the loss as the validation metric if present, if not this
        function does nothing. Note that the model must be reporting loss for
        this to work.
        Override this to override the behavior.
        """
        if self._is_lr_warming_up():
            # we're not done warming up, so don't start using validation
            # metrics to adjust schedule
            return

        if self.opt['lr_scheduler'] == 'reduceonplateau':
            if 'loss' not in metrics_dict:
                # nothing to step on, just skip
                warn_once("LR scheduler expected to see loss metric, but didn't.")
                return
            self.scheduler.step(metrics_dict['loss'])
        elif self.opt['lr_scheduler'] == 'fixed':
            self.scheduler.step()
        elif self.opt['lr_scheduler'] == 'invsqrt':
            # this is a training step lr scheduler, nothing to adjust in validation
            pass
        elif self.opt['lr_schedule'] == 'none':
            # no adjustments, do nothing
            pass
        else:
            raise ValueError(
                "Don't know how to work with lr scheduler '{}'"
                .format(self.opt['lr_schedulre'])
            )
示例#4
0
    def feedback_step(self, batch):
        batchsize = batch.text_vec.size(0)

        warn_once("WARNING: feedback candidates are hardcoded to batch")
        if self.model.training:
            cands, cand_vecs, label_inds = self._build_candidates(
                batch, source='batch', mode='train')
        else:
            cands, cand_vecs, label_inds = self._build_candidates(
                batch, source='batch', mode='eval')

        scores = self.model.score_feedback(batch.text_vec, cand_vecs)
        _, ranks = scores.sort(1, descending=True)

        if self.model.training:
            # Get predictions but not full rankings for the sake of speed
            cand_ranked = None
            preds = [cands[ordering[0]] for ordering in ranks]
        else:
            # Return full rankings to calculate hits@ metrics
            cand_ranked = []
            for ordering in ranks:
                cand_ranked.append([cands[rank] for rank in ordering])
            preds = [cand_ranked[i][0] for i in range(batchsize)]

        if label_inds is None:
            loss = None
        else:
            loss = self.rank_loss(scores, label_inds)
            self.update_fee_metrics(loss, ranks, label_inds, batchsize)
        return loss, preds, cand_ranked
示例#5
0
    def forward(self, input, encoder_state, incr_state=None):
        """
        Forward pass.

        :param LongTensor[batch,seqlen] input:
            The decoder inputs (partial or full decoded token IDs).
        :param encoder_state:
            Output from the encoder module forward pass.
        :param incr_state:
            Ignored. Should always be ``None`` in this version.
        """
        encoder_output, encoder_mask = encoder_state

        seq_len = input.size(1)
        positions = input.new(seq_len).long()
        positions = torch.arange(seq_len, out=positions).unsqueeze(0)
        tensor = self.embeddings(input)
        if self.embeddings_scale:
            tensor = tensor * np.sqrt(self.dim)
        if self.variant == 'xlm':
            tensor = _normalize(tensor, self.norm_embeddings)
        if positions.max().item() > self.n_positions:
            warn_once(
                'You are inputting a sequence of {x} length, but only have '
                '--n-positions {y}. Set --truncate or increase --n-positions'.
                format(x=positions.max().item(), y=self.n_positions))
        tensor = tensor + self.position_embeddings(positions).expand_as(tensor)
        tensor = self.dropout(tensor)  # --dropout

        for layer in self.layers:
            tensor = layer(tensor, encoder_output, encoder_mask)

        return tensor, None
示例#6
0
def _path(opt):
    build(opt)
    datatype = opt['datatype'].split(':')[0]
    if datatype == 'test':
        warn_once("WARNING: Test set not included. Setting datatype to valid.")
        datatype = 'valid'
    return make_path(opt, datatype + '.txt')
def _path(opt):
    # Build the data if it doesn't exist.
    build(opt)
    dt = opt['datatype'].split(':')[0]
    if dt == 'test':
        warn_once("WARNING: Test set not included. Setting datatype to valid.")
        dt = 'valid'
    return os.path.join(opt['datapath'], 'CoQA', dt + '.txt')
示例#8
0
    def forward(self, input, positions=None, segments=None):
        """
        Forward pass.

        :param LongTensor[batch,seqlen] input:
            The input IDs
        :param BoolTensor[batch,seqlen] mask:
            The attention mask; 1 means attend, 0 means ignore.
        :param LongTensor[batch,seqlen]:
            If provided, additionally adds ``segments`` as extra embedding features.
        """
        mask = input != self.padding_idx
        if positions is None:
            positions = (mask.cumsum(dim=1, dtype=torch.int64) - 1).clamp_(min=0)
        tensor = self.embeddings(input)
        if self.embeddings_scale:
            tensor = tensor * np.sqrt(self.dim)

        if positions.max().item() > self.n_positions:
            warn_once(
                'You are inputting a sequence of {x} length, but only have '
                '--n-positions {y}. Set --truncate or increase --n-positions'.format(
                    x=positions.max().item(), y=self.n_positions
                )
            )
        tensor = tensor + self.position_embeddings(positions).expand_as(tensor)

        if self.n_segments >= 1:
            if segments is None:
                segments = torch.zeros_like(input)
            tensor = tensor + self.segment_embeddings(segments)

        if self.variant == 'xlm':
            tensor = _normalize(tensor, self.norm_embeddings)

        # --dropout on the embeddings
        tensor = self.dropout(tensor)

        tensor *= mask.unsqueeze(-1).type_as(tensor)
        for i in range(self.n_layers):
            tensor = self.layers[i](tensor, mask)

        tensor *= self.output_scaling
        if self.reduction_type == 'first':
            return tensor[:, 0, :]
        elif self.reduction_type == 'max':
            return tensor.max(dim=1)[0]
        elif self.reduction_type == 'mean':
            divisor = mask.float().sum(dim=1).unsqueeze(-1).clamp(min=1).type_as(tensor)
            output = tensor.sum(dim=1) / divisor
            return output
        elif self.reduction_type == 'none' or self.reduction_type is None:
            output = tensor
            return output, mask
        else:
            raise ValueError(
                "Can't handle --reduction-type {}".format(self.reduction_type)
            )
示例#9
0
    def __init__(self, opt, shared=None):
        if opt['interactive']:
            print("[ Setting interactive mode defaults... ]")
            opt['prev_response_filter'] = True
            opt['person_tokens'] = True

        # Set subtasks first so that opt['subtasks'] is set before build_model()
        self.set_subtasks(opt)
        self.multitask = len(self.subtasks) > 1
        if not self.multitask:
            self.subtask = self.subtasks[0]

        if opt['prev_response_negatives']:
            if opt['candidates'] in ['fixed', 'vocab']:
                msg = (
                    "[ Option --prev-response-negatives=True is incompatible with "
                    "--candidates=['fixed','vocab']. Overriding it to False. ]"
                )
                warn_once(msg)
                self.opt['prev_response_negatives'] = False
            self.prev_responses = None
        if opt['prev_response_filter']:
            if not opt['interactive']:
                msg = (
                    "[ Option --prev-response-filter=True can only be used when "
                    "--interactive=True. Overriding it to False. ]")
                warn_once(msg)
                self.opt['prev_response_filter'] = False
            self.prev_response = None

        super().__init__(opt, shared)

        self.status = NORMAL
        if self.opt['interactive']:
            assert 'dialog' in self.subtasks
            assert 'sentiment' in self.subtasks
        else:
            assert not self.opt['request_explanation']
            assert not self.opt['request_rating']

        self.task_weight = {
            'dialog': opt['dia_weight'],
            'explanation': opt['exp_weight'],
            'sentiment': opt['sen_weight'],
        }

        # dialog/explanation tasks use self.rank_loss from TorchRankerAgent
        # Don't do BCEWithLogitsLoss since we need the probs from the sigmoid anyway
        self.sentiment_criterion = nn.BCELoss(reduce=True, size_average=False)

        # Set rating classifier
        if opt['regex']:
            self.rating_classifier = FeedbackClassifierRegex()

        random.seed()
        self.history = [
        ]  # Overwrite the deque; keep the whole history and slice
        self.reset()
示例#10
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        self.model.eval()

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            # loss = self.compute_loss(batch)  # noqa: F841  we need the side effects
            # self.metrics['loss'] += loss.item()
            valid_loss = self.model.valid(batch.text_vec, batch.context_lens,
                                          batch.text_lengths, batch.floors,
                                          batch.label_vec, batch.label_lengths,
                                          self.criterion)
            self.metrics['correct_tokens'] += valid_loss['correct_tokens']
            self.metrics['nll_loss'] += valid_loss['nll_loss']
            self.metrics['num_tokens'] += valid_loss['num_tokens']
            self.metrics['loss'] += valid_loss['avg_loss']

            if self.opt.get('hred', False):
                pass
            elif self.opt.get('vhred', False):
                self.metrics['kl_loss_cnt'] += 1
                self.metrics['kl_loss'] += valid_loss['vhred_kl_loss']
                self.metrics['bow_loss_cnt'] += 1
                self.metrics['bow_loss'] += valid_loss['bow_loss']
            else:
                self.metrics['loss_G_cnt'] += 1
                self.metrics['loss_G'] += valid_loss['valid_loss_G']
                self.metrics['loss_D_cnt'] += 1
                self.metrics['loss_D'] += valid_loss['valid_loss_D']

        preds = None
        if self.skip_generation:
            # noinspection PyTypeChecker
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning)
        else:
            sample_words, sample_lens = self.model.sample(
                batch.text_vec, batch.context_lens, batch.text_lengths,
                batch.floors, self.START_IDX, self.END_IDX)
            preds = torch.from_numpy(sample_words)
        if batch.label_vec is not None:
            label_text = batch.labels
            # we are in the validation mode, print some generated responses for debugging
            for i in range(len(preds)):
                if random.random() > (1 - self.opt['report_freq']):
                    context_text = batch.observations[i]['text']
                    print('TEXT: ', context_text)
                    print('TARGET: ', label_text[i])
                    print('PREDICTION: ', self._v2t(preds[i]), '\n~')
        else:
            label_text = None

        context = [obs['text'] for obs in batch.observations]
        text = [self._v2t(p) for p in preds] if preds is not None else None
        return Output(text, None), label_text, context
示例#11
0
def _path(opt, teacher_name):
    # Build the data if it doesn't exist.
    build(opt)
    dt = opt['datatype'].split(':')[0]
    if dt == 'test':
        warn_once('WARNING: Test set not included. Setting datatype to valid.')
        dt = 'valid'
    if dt == 'valid':
        dt = dt + '_' + teacher_name
    return os.path.join(opt['datapath'], 'HotpotQA', dt + '.txt')
示例#12
0
 def _upgrade_opt(self, opt):
     model_opt = opt['model_file'] + '.opt'
     if not os.path.isfile(model_opt):
         return
     old_opt = load_opt_file(model_opt)
     if 'add_cls_token' not in old_opt:
         # old model, make this default to False
         warn_once('Old model: overriding `add_cls_token` to False.')
         opt['add_cls_token'] = False
     return
示例#13
0
文件: agents.py 项目: nmfisher/ParlAI
def _path(opt, persona, use_cands):
    # Build the data if it doesn't exist.
    build(opt)
    datatype = opt['datatype'].split(':')[0]
    if datatype == 'test':
        warn_once("WARNING: Test set not included. Setting datatype to valid.")
        datatype = 'valid'
    dt = datatype + '_' + persona
    cands = '' if use_cands else '_no_cands'
    return os.path.join(opt['datapath'], 'ConvAI2', dt + cands + '.txt')
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss = self.compute_loss(
                batch)  # noqa: F841  we need the side effects
            self.metrics['loss'] += loss.item()

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning,
            )
        else:
            maxlen = self.label_truncate or 256
            beam_preds_scores, _ = self._generate(batch, self.beam_size,
                                                  maxlen)
            preds, scores = zip(*beam_preds_scores)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._model_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds] if preds is not None else None
        return Output(text, cand_choices)
示例#15
0
    def upgrade_opt(cls, opt_on_disk):
        """Upgrade opts from older model files."""
        super(BertClassifierAgent, cls).upgrade_opt(opt_on_disk)

        # 2019-06-25: previous versions of the model did not add a CLS token
        # to the beginning of text_vec.
        if 'add_cls_token' not in opt_on_disk:
            warn_once('Old model: overriding `add_cls_token` to False.')
            opt_on_disk['add_cls_token'] = False

        return opt_on_disk
示例#16
0
    def upgrade_opt(cls, opt_on_disk):
        """Upgrade opts from older model files."""
        super(SelfFeedingAgent, cls).upgrade_opt(opt_on_disk)

        # 2019-06-25: previous versions of the model did not add a CLS token
        # to the beginning of text_vec.
        if 'add_double_person_tokens' not in opt_on_disk:
            warn_once(
                'Old model: overriding `add_double_person_tokens` to True.')
            opt_on_disk['add_double_person_tokens'] = True

        return opt_on_disk
示例#17
0
    def forward(self, input, positions=None, segments=None):
        """
            input data is a FloatTensor of shape [batch, seq_len, dim]
            mask is a ByteTensor of shape [batch, seq_len], filled with 1 when
            inside the sequence and 0 outside.
        """
        mask = input != self.padding_idx
        if positions is None:
            positions = (mask.cumsum(dim=1, dtype=torch.int64) -
                         1).clamp_(min=0)
        tensor = self.embeddings(input)
        if self.embeddings_scale:
            tensor = tensor * np.sqrt(self.dim)

        if positions.max().item() > self.n_positions:
            warn_once(
                'You are inputting a sequence of {x} length, but only have '
                '--n-positions {y}. Set --truncate or increase --n-positions'.
                format(x=positions.max().item(), y=self.n_positions))
        tensor = tensor + self.position_embeddings(positions).expand_as(tensor)

        if self.n_segments >= 1:
            if segments is None:
                segments = torch.zeros_like(input)
            tensor = tensor + self.segment_embeddings(segments)

        if self.variant == 'xlm':
            tensor = _normalize(tensor, self.norm_embeddings)

        # --dropout on the embeddings
        tensor = self.dropout(tensor)

        tensor *= mask.unsqueeze(-1).type_as(tensor)
        for i in range(self.n_layers):
            tensor = self.layers[i](tensor, mask)

        if self.reduction_type == 'first':
            return tensor[:, 0, :]
        elif self.reduction_type == 'max':
            return tensor.max(dim=1)[0]
        elif self.reduction_type == 'mean':
            divisor = mask.float().sum(dim=1).unsqueeze(-1).clamp(
                min=1).type_as(tensor)
            output = tensor.sum(dim=1) / divisor
            return output
        elif self.reduction_type == 'none' or self.reduction_type is None:
            output = tensor
            return output, mask
        else:
            raise ValueError("Can't handle --reduction-type {}".format(
                self.reduction_type))
示例#18
0
 def is_valid(self, obs):
     normally_valid = super().is_valid(obs)
     if not normally_valid:
         # shortcut boolean evaluation
         return normally_valid
     contains_empties = obs['text_vec'].shape[0] == 0
     if self.is_training and contains_empties:
         warn_once(
             'seq2seq got an empty input sequence (text_vec) during training. '
             'Skipping this example, but you should check your dataset and '
             'preprocessing.')
     elif not self.is_training and contains_empties:
         warn_once('seq2seq got an empty input sequence (text_vec) in an '
                   'evaluation example! This may affect your metrics!')
     return not contains_empties
    def upgrade_opt(cls, opt_from_disk):
        # call the parent upgrades
        opt_from_disk = super(TorchGeneratorAgent, cls).upgrade_opt(opt_from_disk)

        # 2019-08-18: Adding support for generation other than beam search
        # Previously, selecting --beam-size > 1 enabled beam search and == 1 was
        # greedy. New behavior is --inference greedy or --inference beam.
        if 'inference' not in opt_from_disk:
            assert 'beam_size' in opt_from_disk
            if opt_from_disk['beam_size'] == 1:
                method = 'greedy'
            else:
                method = 'beam'
            opt_from_disk['inference'] = method
            warn_once(f'Old model inference method inferred as {method}')
        return opt_from_disk
示例#20
0
    def train_step(self, batch):
        """Train on a single batch of examples."""
        if batch.text_vec is None:
            return
        batchsize = batch.text_vec.size(0)
        self.model.train()
        self.zero_grad()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.candidates, mode='train'
        )
        try:
            scores = self.score_candidates(batch, cand_vecs)
            loss = self.rank_loss(scores, label_inds)
            self.backward(loss)
            self.update_params()
        except RuntimeError as e:
            # catch out of memory exceptions during fwd/bck (skip batch)
            if 'out of memory' in str(e):
                print(
                    '| WARNING: ran out of memory, skipping batch. '
                    'if this happens frequently, decrease batchsize or '
                    'truncate the inputs to the model.'
                )
                return Output()
            else:
                raise e

        # Update loss
        self.metrics['loss'] += loss.item()
        self.metrics['examples'] += batchsize

        # Get train predictions
        if self.candidates == 'batch':
            self._get_batch_train_metrics(scores)
            return Output()
        if not self.opt.get('train_predict', False):
            warn_once(
                "Some training metrics are omitted for speed. Set the flag "
                "`--train-predict` to calculate train metrics."
            )
            return Output()
        return self._get_train_preds(scores, label_inds, cands, cand_vecs)
示例#21
0
    def _extract_prev_responses(self, batch):
        # Extract prev_responses for self-feeding formatted examples
        warn_once(
            "WARNING: This code is specific to self-feeding formatted examples"
        )

        # TODO: Pull out p1/p2 once elsewhere, not every time
        p1 = self.dict.txt2vec('__p1__')[0]
        p2 = self.dict.txt2vec('__p2__')[0]
        self.prev_responses = []
        # Do naively for now with a for loop
        for text_vec in batch.text_vec:
            p1s = (text_vec == p1).nonzero()
            p2s = (text_vec == p2).nonzero()
            if len(p1s) and len(p2s):
                response_vec = text_vec[p2s[-1] + 1:p1s[-1]]
            else:
                response_vec = [self.NULL_IDX]  # TODO: pull in actual N
            response = self.dict.vec2txt(response_vec)
            self.prev_responses.append(response)
示例#22
0
def _rouge(guess, answers):
    global rouge
    """Compute ROUGE score between guess and *any* answers. Return the best."""
    if rouge is None:
        return None, None, None
    evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l'], max_n=2)
    try:
        scores = [
            evaluator.get_scores(normalize_answer(guess), normalize_answer(a))
            for a in answers
        ]
    except LookupError:
        warn_once('ROUGE requires nltk punkt tokenizer. Please run '
                  '`python -c "import nltk; nltk.download(\'punkt\')`')
        rouge = None
        return None, None, None

    scores_rouge1 = [score['rouge-1']['r'] for score in scores]
    scores_rouge2 = [score['rouge-2']['r'] for score in scores]
    scores_rougeL = [score['rouge-l']['r'] for score in scores]
    return max(scores_rouge1), max(scores_rouge2), max(scores_rougeL)
示例#23
0
    def forward(self, input, encoder_state, incr_state=None):
        encoder_output, encoder_mask = encoder_state

        seq_len = input.size(1)
        positions = input.new(seq_len).long()
        positions = torch.arange(seq_len, out=positions).unsqueeze(0)
        tensor = self.embeddings(input)
        if self.embeddings_scale:
            tensor = tensor * np.sqrt(self.dim)
        if self.variant == 'xlm':
            tensor = _normalize(tensor, self.norm_embeddings)
        if positions.max().item() > self.n_positions:
            warn_once(
                'You are inputting a sequence of {x} length, but only have '
                '--n-positions {y}. Set --truncate or increase --n-positions'.
                format(x=positions.max().item(), y=self.n_positions))
        tensor = tensor + self.position_embeddings(positions).expand_as(tensor)
        tensor = self.dropout(tensor)  # --dropout

        for layer in self.layers:
            tensor = layer(tensor, encoder_output, encoder_mask)

        return tensor, None
    def is_valid(self, obs):
        """Override from TorchAgent."""
        if not self.opt.get('ignore_bad_candidates', False):
            return super().is_valid(obs)

        if 'text_vec' not in obs and 'image' not in obs:
            # TODO: this should really be a call to super, i.e.
            # if not super().is_valid(obs): return False
            return False

        # skip examples for which the set of label candidates do not
        # contain the label
        if 'labels_vec' in obs and 'label_candidates_vecs' in obs:
            cand_vecs = obs['label_candidates_vecs']
            label_vec = obs['labels_vec']
            matches = [x for x in cand_vecs if torch.equal(x, label_vec)]
            if len(matches) == 0:
                warn_once(
                    'At least one example has a set of label candidates that '
                    'does not contain the label.')
                return False

        return True
示例#25
0
def verify(opt, printargs=None, print_parser=None):
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)

    log_every_n_secs = opt.get('log_every_n_secs', -1)
    if log_every_n_secs <= 0:
        log_every_n_secs = float('inf')
    log_time = TimeLogger()

    counts = {}
    counts['missing_text'] = 0
    counts['missing_labels'] = 0
    counts['missing_label_candidates'] = 0
    counts['empty_label_candidates'] = 0

    # Show some example dialogs.
    while not world.epoch_done():
        world.parley()

        act = world.acts[0]
        if 'text' not in act:
            warn_once("warning: missing text field")
            counts['missing_text'] += 1

        if 'labels' not in act and 'eval_labels' not in act:
            warn_once("warning: missing labels/eval_labels field")
            counts['missing_labels'] += 1
        else:
            if 'label_candidates' not in act:
                counts['missing_label_candidates'] += 1
            else:
                for c in act['label_candidates']:
                    if c == '':
                        warn_once("warning: empty string label_candidate")
                        counts['empty_label_candidates'] += 1

        if log_time.time() > log_every_n_secs:
            text, log = report(world, counts, log_time)
            if print_parser:
                print(text)

    try:
        # print dataset size if available
        print('[ loaded {} episodes with a total of {} examples ]'.format(
            world.num_episodes(), world.num_examples()))
    except Exception:
        pass
    return report(world, counts, log_time)
示例#26
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None
        if getattr(batch, 'movies', None):
            assert hasattr(self.model, 'kbrd')
            self.model.user_representation, _ = self.model.kbrd.user_representation(
                batch.movies)
            self.model.user_representation = self.model.user_representation.detach(
            )

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss = self.compute_loss(
                batch)  # noqa: F841  we need the side effects
            self.metrics['loss'] += loss.item()

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning)
        elif self.beam_size == 1:
            # greedy decode
            _, preds, *_ = self.model(*self._model_input(batch), bsz=bsz)
        elif self.beam_size > 1:
            out = self.beam_search(self.model,
                                   batch,
                                   self.beam_size,
                                   start=self.START_IDX,
                                   end=self.END_IDX,
                                   pad=self.NULL_IDX,
                                   min_length=self.beam_min_length,
                                   min_n_best=self.beam_min_n_best,
                                   block_ngram=self.beam_block_ngram)
            beam_preds_scores, _, beams = out
            preds, scores = zip(*beam_preds_scores)

            if self.beam_dot_log is True:
                self._write_beam_dots(batch.text_vec, beams)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._model_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds] if preds is not None else None
        return Output(text, cand_choices)
示例#27
0
文件: train_model.py 项目: ying-A/RED
    def train(self):
        if is_distributed():
            warn_once(
                "Distributed training outputs average-per-worker metrics during "
                "training, and may be slightly distorted. Validation/test are "
                "unadulterated.")
        opt = self.opt
        world = self.world
        with world:
            while True:
                # do one example / batch of examples
                world.parley()
                self.parleys += 1
                # print(world.display())

                # get the total training examples done, compute epochs
                self._total_epochs = (
                    self._preempted_epochs +
                    num_workers() * self.world.get_total_epochs())
                exs_per_epoch = self.world.num_examples()
                self._total_exs = int(
                    np.round(self._total_epochs * exs_per_epoch))

                # and use the primary worker's timings for everything
                train_time, log_time, validate_time = sync_object(
                    (self.train_time.time(), self.log_time.time(),
                     self.validate_time.time()))

                # check counters and timers
                if self._total_epochs >= self.max_num_epochs:
                    self.log()
                    print(
                        '[ num_epochs completed:{} time elapsed:{}s ]'.format(
                            self.max_num_epochs, train_time))
                    break
                if train_time > self.max_train_time:
                    print('[ max_train_time elapsed:{}s ]'.format(train_time))
                    break
                if log_time > self.log_every_n_secs:
                    self.log()
                if (validate_time > self.val_every_n_secs
                        or self._total_epochs - self.last_valid_epoch >=
                        self.val_every_n_epochs):
                    stop_training = self.validate()
                    self.last_valid_epoch = self._total_epochs
                    if stop_training:
                        break
                if (self.save_time.time() > self.save_every_n_secs
                        and opt.get('model_file') and is_primary_worker()):
                    print("[ saving model checkpoint: {}.checkpoint".format(
                        opt['model_file']))
                    self.save_model('.checkpoint')
                    self.save_time.reset()

        if not self.saved and is_primary_worker():
            # save agent
            self.save_model()
        elif opt.get('model_file'):
            # reload best validation model
            self.agent = create_agent(opt)

        valid_world = _maybe_load_eval_world(self.agent, opt, 'valid')
        v_report = run_eval(valid_world, opt, 'valid', write_log=True)
        test_world = _maybe_load_eval_world(self.agent, opt, 'test')
        t_report = run_eval(test_world, opt, 'test', write_log=True)
        if valid_world:
            valid_world.shutdown()
        if test_world:
            test_world.shutdown()

        return v_report, t_report
示例#28
0
def load_agent_module(opt):
    """Load agent options and module from file if opt file exists.

    Checks to see if file exists opt['model_file'] + ".opt"; if so, load up the
    options from the file and use that to create an agent, loading the model
    type from that file and overriding any options specified in that file when
    instantiating the agent.

    If that file does not exist, return None.
    """
    model_file = opt['model_file']
    optfile = model_file + '.opt'
    if os.path.isfile(optfile):
        new_opt = _load_opt_file(optfile)
        if 'batchindex' in new_opt:
            # This saved variable can cause trouble if we switch to BS=1 at test time
            del new_opt['batchindex']
        # only override opts specified in 'override' dict
        if opt.get('override'):
            for k, v in opt['override'].items():
                if str(v) != str(new_opt.get(k, None)):
                    print("[ warning: overriding opt['{}'] to {} ("
                          "previously: {} )]".format(k, v,
                                                     new_opt.get(k, None)))
                new_opt[k] = v
        # add model arguments to new_opt if they aren't in new_opt already
        for k, v in opt.items():
            if k not in new_opt:
                new_opt[k] = v
        new_opt['model_file'] = model_file
        if not new_opt.get('dict_file'):
            new_opt['dict_file'] = model_file + '.dict'
        elif new_opt.get('dict_file') and not os.path.isfile(
                new_opt['dict_file']):
            old_dict_file = new_opt['dict_file']
            new_opt['dict_file'] = model_file + '.dict'
        if not os.path.isfile(new_opt['dict_file']):
            warn_once(
                'WARNING: Neither the specified dict file ({}) nor the '
                '`model_file`.dict file ({}) exists, check to make sure either '
                'is correct. This may manifest as a shape mismatch later '
                'on.'.format(old_dict_file, new_opt['dict_file']))
        model_class = get_agent_module(new_opt['model'])

        # check for model version
        if hasattr(model_class, 'model_version'):
            curr_version = new_opt.get('model_version', 0)
            if curr_version != model_class.model_version():
                model = new_opt['model']
                m = ('It looks like you are trying to load an older version of'
                     ' the selected model. Change your model argument to use '
                     'the old version from parlai/agents/legacy_agents: for '
                     'example: `-m legacy:{m}:{v}` or '
                     '`--model parlai.agents.legacy_agents.{m}.{m}_v{v}:{c}`')
                if '.' not in model:
                    # give specific error message if it's easy
                    raise RuntimeError(
                        m.format(m=model,
                                 v=curr_version,
                                 c=model_class.__name__))
                else:
                    # otherwise generic one
                    raise RuntimeError(
                        m.format(m='modelname', v=curr_version,
                                 c='ModelAgent'))

        # if we want to load weights from --init-model, compare opts with
        # loaded ones
        compare_init_model_opts(opt, new_opt)
        return model_class(new_opt)
    else:
        return None
示例#29
0
    def _build_candidates(self, batch, source, mode):
        """Build a candidate set for this batch

        :param batch: a Batch object (defined in torch_agent.py)
        :param source: the source from which candidates should be built, one of
            ['batch', 'inline', 'fixed']
        :param mode: 'train' or 'eval'

        :return: tuple of tensors (label_inds, cands, cand_vecs)
            label_inds: A [bsz] LongTensor of the indices of the labels for each
                example from its respective candidate set
            cands: A [num_cands] list of (text) candidates
                OR a [batchsize] list of such lists if source=='inline'
            cand_vecs: A padded [num_cands, seqlen] LongTensor of vectorized candidates
                OR a [batchsize, num_cands, seqlen] LongTensor if source=='inline'

        Possible sources of candidates:
            * batch: the set of all labels in this batch
                Use all labels in the batch as the candidate set (with all but the
                example's label being treated as negatives).
                Note: with this setting, the candidate set is identical for all
                examples in a batch. This option may be undesirable if it is possible
                for duplicate labels to occur in a batch, since the second instance of
                the correct label will be treated as a negative.
            * inline: batch_size lists, one list per example
                If each example comes with a list of possible candidates, use those.
                Note: With this setting, each example will have its own candidate set.
            * fixed: one global candidate list, provided in a file from the user
                If self.fixed_candidates is not None, use a set of fixed candidates for
                all examples.
                Note: this setting is not recommended for training unless the
                universe of possible candidates is very small.
            * vocab: one global candidate list, extracted from the vocabulary with the
                exception of self.NULL_IDX.
        """
        label_vecs = batch.label_vec  # [bsz] list of lists of LongTensors
        label_inds = None
        batchsize = batch.text_vec.shape[0]

        if label_vecs is not None:
            assert label_vecs.dim() == 2

        if source == 'batch':
            warn_once(
                '[ Executing {} mode with batch labels as set of candidates. ]'
                ''.format(mode))
            if batchsize == 1:
                warn_once(
                    "[ Warning: using candidate source 'batch' and observed a "
                    "batch of size 1. This may be due to uneven batch sizes at "
                    "the end of an epoch. ]")
            if label_vecs is None:
                raise ValueError(
                    "If using candidate source 'batch', then batch.label_vec cannot be "
                    "None.")

            cands = batch.labels
            cand_vecs = label_vecs
            label_inds = label_vecs.new_tensor(range(batchsize))

        elif source == 'inline':
            warn_once(
                '[ Executing {} mode with provided inline set of candidates ]'
                ''.format(mode))
            if batch.candidate_vecs is None:
                raise ValueError(
                    "If using candidate source 'inline', then batch.candidate_vecs "
                    "cannot be None. If your task does not have inline candidates, "
                    "consider using one of --{m}={{'batch','fixed','vocab'}}."
                    "".format(m='candidates' if mode ==
                              'train' else 'eval-candidates'))

            cands = batch.candidates
            cand_vecs = padded_3d(batch.candidate_vecs, use_cuda=self.use_cuda)
            if label_vecs is not None:
                label_inds = label_vecs.new_empty((batchsize))
                for i, label_vec in enumerate(label_vecs):
                    label_vec_pad = label_vec.new_zeros(cand_vecs[i].size(1))
                    label_vec_pad[0:label_vec.size(0)] = label_vec
                    label_inds[i] = self._find_match(cand_vecs[i],
                                                     label_vec_pad)

        elif source == 'fixed':
            warn_once(
                "[ Executing {} mode with a common set of fixed candidates "
                "(n = {}). ]".format(mode, len(self.fixed_candidates)))
            if self.fixed_candidates is None:
                raise ValueError(
                    "If using candidate source 'fixed', then you must provide the path "
                    "to a file of candidates with the flag --fixed-candidates-path"
                )

            cands = self.fixed_candidates
            cand_vecs = self.fixed_candidate_vecs
            if label_vecs is not None:
                label_inds = label_vecs.new_empty((batchsize))
                for i, label_vec in enumerate(label_vecs):
                    label_inds[i] = self._find_match(cand_vecs, label_vec)

        elif source == 'vocab':
            warn_once(
                '[ Executing {} mode with tokens from vocabulary as candidates. ]'
                ''.format(mode))
            cands = self.vocab_candidates
            cand_vecs = self.vocab_candidate_vecs
            if label_vecs is not None:
                label_inds = label_vecs.new_empty((batchsize))
                for i, label_vec in enumerate(label_vecs):
                    label_inds[i] = self._find_match(cand_vecs, label_vec)

        return (cands, cand_vecs, label_inds)
示例#30
0
ROUGE_METRICS = {'rouge-1', 'rouge-2', 'rouge-L'}
ALL_METRICS = DEFAULT_METRICS | ROUGE_METRICS

try:
    from nltk.translate import bleu_score as nltkbleu
except ImportError:
    # User doesn't have nltk installed, so we can't use it for bleu
    # We'll just turn off things, but we might want to warn the user
    nltkbleu = None

try:
    import rouge as rouge
except ImportError:
    # User doesn't have rouge installed, so we can't use it for rouge
    # We'll just turn off things, but we might want to warn the user
    warn_once(
        'Rouge metrics require py-rouge. Please run `pip install py-rouge`.')
    rouge = None

re_art = re.compile(r'\b(a|an|the)\b')
re_punc = re.compile(r'[!"#$%&()*+,-./:;<=>?@\[\]\\^`{|}~_\']')


def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re_art.sub(' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):