Exemplo n.º 1
0
    def calc_loss(self, src, trg, loss_calculator):
        self.start_sent(src)
        initial_states = self._encode_src(src)
        # Calculate losses from multiple initial states
        losses = []
        for initial_state in initial_states:
            model_loss = FactoredLossExpr()
            model_loss.add_factored_loss_expr(
                loss_calculator.calc_loss(self, initial_state, src, trg))

            if self.global_fertility != 0:
                masked_attn = self.attender.attention_vecs
                if trg.mask is not None:
                    trg_mask = 1 - (trg.mask.np_arr.transpose())
                    masked_attn = [
                        dy.cmult(attn, dy.inputTensor(mask, batched=True))
                        for attn, mask in zip(masked_attn, trg_mask)
                    ]
                model_loss.add_loss("fertility",
                                    self._global_fertility(masked_attn))
            losses.append(model_loss)
        try:
            total_loss = FactoredLossExpr()
            list(total_loss.add_factored_loss_expr(x) for x in losses)
            return total_loss
        finally:
            self.losses = losses
Exemplo n.º 2
0
 def calc_loss(self, src, trg, loss_calculator):
     sub_losses = collections.defaultdict(list)
     for model in self.models:
         for loss_name, loss in model.calc_loss(
                 src, trg, loss_calculator).expr_factors.items():
             sub_losses[loss_name].append(loss)
     model_loss = FactoredLossExpr()
     for loss_name, losslist in sub_losses.items():
         # TODO: dy.average(losslist)  _or_  dy.esum(losslist) / len(self.models) ?
         #       -- might not be the same if not all models return all losses
         model_loss.add_loss(loss_name, dy.average(losslist))
     return model_loss
Exemplo n.º 3
0
 def on_calc_additional_loss(self, trg, generator, generator_loss):
     assert hasattr(
         generator,
         "losses"), "Must support multi sample encoder from generator."
     if self.policy_learning is None:
         return None
     ### Calculate reward
     rewards = []
     trg_counts = dy.inputTensor([t.len_unpadded() for t in trg],
                                 batched=True)
     # Iterate through all samples
     for i, (loss, actions) in enumerate(
             zip(generator.losses, self.compose_output)):
         reward = FactoredLossExpr()
         # Adding all reward from the translator
         for loss_key, loss_value in loss.get_nobackprop_loss().items():
             if loss_key == 'mle':
                 reward.add_loss('mle', dy.cdiv(-loss_value, trg_counts))
             else:
                 reward.add_loss(loss_key, -loss_value)
         if self.length_prior is not None:
             reward.add_loss(
                 'seg_lp',
                 self.length_prior.log_ll(self.seg_size_unpadded[i]))
         rewards.append(dy.esum(list(reward.expr_factors.values())))
     ### Calculate losses
     return self.policy_learning.calc_loss(rewards)
Exemplo n.º 4
0
    def calc_loss(self, src, trg, loss_cal=None, infer_prediction=False):
        self.start_sent(src)
        if not xnmt.batcher.is_batched(src):
            src = xnmt.batcher.mark_as_batch([src])
        if not xnmt.batcher.is_batched(trg):
            trg = xnmt.batcher.mark_as_batch([trg])
        src_words = np.array([[Vocab.SS] + x.words for x in src])
        batch_size, src_len = src_words.shape

        if isinstance(src.mask, type(None)):
            src_mask = np.zeros((batch_size, src_len), dtype=np.int)
        else:
            src_mask = np.concatenate([
                np.zeros((batch_size, 1), dtype=np.int),
                src.mask.np_arr.astype(np.int)
            ],
                                      axis=1)

        src_embeddings = self.sentence_block_embed(
            self.src_embedder.embeddings, src_words, src_mask)
        src_embeddings = self.make_input_embedding(src_embeddings, src_len)

        trg_words = np.array(
            list(map(lambda x: [Vocab.SS] + x.words[:-1], trg)))
        batch_size, trg_len = trg_words.shape

        if isinstance(trg.mask, type(None)):
            trg_mask = np.zeros((batch_size, trg_len), dtype=np.int)
        else:
            trg_mask = trg.mask.np_arr.astype(np.int)

        trg_embeddings = self.sentence_block_embed(
            self.trg_embedder.embeddings, trg_words, trg_mask)
        trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len)

        xx_mask = self.make_attention_mask(src_mask, src_mask)
        xy_mask = self.make_attention_mask(trg_mask, src_mask)
        yy_mask = self.make_attention_mask(trg_mask, trg_mask)
        yy_mask *= self.make_history_mask(trg_mask)

        z_blocks = self.encoder.transduce(src_embeddings, xx_mask)
        h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask)

        if infer_prediction:
            y_len = h_block.dim()[0][1]
            last_col = dy.pick(h_block, dim=1, index=y_len - 1)
            logits = self.decoder.output(last_col)
            return logits

        ref_list = list(
            itertools.chain.from_iterable(map(lambda x: x.words, trg)))
        concat_t_block = (1 -
                          trg_mask.ravel()).reshape(-1) * np.array(ref_list)
        loss = self.decoder.output_and_loss(h_block, concat_t_block)
        return FactoredLossExpr({"mle": loss})
Exemplo n.º 5
0
    def eval(self) -> 'EvalScore':
        """
    Perform evaluation task.

    Returns:
      Evaluated score
    """
        self.model.set_train(False)
        if self.src_data is None:
            self.src_data, self.ref_data, self.src_batches, self.ref_batches = \
              xnmt.input_reader.read_parallel_corpus(src_reader=self.model.src_reader,
                                                     trg_reader=self.model.trg_reader,
                                                     src_file=self.src_file,
                                                     trg_file=self.ref_file,
                                                     batcher=self.batcher,
                                                     max_src_len=self.max_src_len,
                                                     max_trg_len=self.max_trg_len)
        loss_val = FactoredLossVal()
        ref_words_cnt = 0
        for src, trg in zip(self.src_batches, self.ref_batches):
            with util.ReportOnException({
                    "src": src,
                    "trg": trg,
                    "graph": dy.print_text_graphviz
            }):
                dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE,
                            check_validity=settings.CHECK_VALIDITY)

                loss_builder = FactoredLossExpr()
                standard_loss = self.model.calc_loss(src, trg,
                                                     self.loss_calculator)
                additional_loss = self.model.calc_additional_loss(
                    trg, self.model, standard_loss)
                loss_builder.add_factored_loss_expr(standard_loss)
                loss_builder.add_factored_loss_expr(additional_loss)

                ref_words_cnt += sum([trg_i.len_unpadded() for trg_i in trg])
                loss_val += loss_builder.get_factored_loss_val(
                    comb_method=self.loss_comb_method)

        loss_stats = {k: v / ref_words_cnt for k, v in loss_val.items()}

        try:
            return LossScore(loss_stats[self.model.get_primary_loss()],
                             loss_stats=loss_stats,
                             num_ref_words=ref_words_cnt,
                             desc=self.desc)
        except KeyError:
            raise RuntimeError(
                "Did you wrap your loss calculation with FactoredLossExpr({'primary_loss': loss_value}) ?"
            )
Exemplo n.º 6
0
    def calc_loss(self, translator, initial_state, src, trg):
        batch_size = trg.batch_size()
        uniques = [set() for _ in range(batch_size)]
        deltas = []
        probs = []

        search_outputs = translator.search_strategy.generate_output(
            translator, initial_state, forced_trg_ids=trg)
        for search_output in search_outputs:
            logprob = search_output.logsoftmaxes
            sample = search_output.word_ids
            attentions = search_output.attentions

            logprob = dy.esum(logprob) * self.alpha
            # Calculate the evaluation score
            eval_score = np.zeros(batch_size, dtype=float)
            mask = np.zeros(batch_size, dtype=float)
            for j in range(batch_size):
                ref_j = self.remove_eos(trg[j].words)
                hyp_j = self.remove_eos(sample[j].tolist())
                if self.unique_sample:
                    hash_val = hash(tuple(hyp_j))
                    if len(hyp_j) == 0 or hash_val in uniques[j]:
                        mask[j] = -INFINITY
                        continue
                    else:
                        # Count this sample in
                        uniques[j].add(hash_val)
                    # Calc evaluation score
                eval_score[j] = self.evaluation_metric.evaluate(ref_j, hyp_j) * \
                                (-1 if self.inv_eval else 1)
            # Appending the delta and logprob of this sample
            prob = logprob + dy.inputTensor(mask, batched=True)
            deltas.append(dy.inputTensor(eval_score, batched=True))
            probs.append(prob)
        sample_prob = dy.softmax(dy.concatenate(probs))
        deltas = dy.concatenate(deltas)
        risk = dy.sum_elems(dy.cmult(sample_prob, deltas))

        ### Debug
        #print(sample_prob.npvalue().transpose()[0])
        #print(deltas.npvalue().transpose()[0])
        #print("----------------------")
        ### End debug

        return FactoredLossExpr({"risk": risk})
Exemplo n.º 7
0
 def calc_loss(self, rewards):
     loss = FactoredLossExpr()
     ## Z-Normalization
     if self.z_normalization:
         reward_batches = dy.concatenate_to_batch(rewards)
         mean_batches = dy.mean_batches(reward_batches)
         std_batches = dy.std_batches(reward_batches)
         rewards = [
             dy.cdiv(reward - mean_batches, std_batches)
             for reward in rewards
         ]
     ## Calculate baseline
     if self.baseline is not None:
         pred_reward, baseline_loss = self.calc_baseline_loss(rewards)
         loss.add_loss("rl_baseline", baseline_loss)
     ## Calculate Confidence Penalty
     if self.confidence_penalty:
         loss.add_loss("rl_confpen",
                       self.confidence_penalty.calc_loss(self.policy_lls))
     ## Calculate Reinforce Loss
     reinf_loss = []
     # Loop through all action in one sequence
     for i, (policy,
             action_sample) in enumerate(zip(self.policy_lls,
                                             self.actions)):
         # Discount the reward if we use baseline
         if self.baseline is not None:
             rewards = [reward - pred_reward[i] for reward in rewards]
         # Main Reinforce calculation
         sample_loss = []
         for action, reward in zip(action_sample, rewards):
             ll = dy.pick_batch(policy, action)
             if self.valid_pos is not None:
                 ll = dy.pick_batch_elems(ll, self.valid_pos[i])
                 reward = dy.pick_batch_elems(reward, self.valid_pos[i])
             sample_loss.append(dy.sum_batches(ll * reward))
         # Take the average of the losses accross multiple samples
         reinf_loss.append(dy.esum(sample_loss) / len(sample_loss))
     loss.add_loss("rl_reinf", self.weight * -dy.esum(reinf_loss))
     ## the composed losses
     return loss
Exemplo n.º 8
0
 def calc_loss(self, translator, initial_state, src, trg):
     # TODO(philip30): currently only using the best hypothesis / first sample for reinforce loss
     # A small further implementation is needed if we want to do reinforce with multiple samples.
     search_output = translator.search_strategy.generate_output(
         translator, initial_state)[0]
     # Calculate evaluation scores
     self.eval_score = []
     for trg_i, sample_i in zip(trg, search_output.word_ids):
         # Removing EOS
         sample_i = self.remove_eos(sample_i.tolist())
         ref_i = self.remove_eos(trg_i.words)
         # Evaluating
         if len(sample_i) == 0:
             score = 0
         else:
             score = self.evaluation_metric.evaluate(ref_i, sample_i) * \
                     (-1 if self.inv_eval else 1)
         self.eval_score.append(score)
     self.true_score = dy.inputTensor(self.eval_score, batched=True)
     # Composing losses
     loss = FactoredLossExpr()
     if self.use_baseline:
         baseline_loss = []
         losses = []
         for state, logsoft, mask in zip(search_output.state,
                                         search_output.logsoftmaxes,
                                         search_output.mask):
             bs_score = self.baseline(state)
             baseline_loss.append(
                 dy.squared_distance(self.true_score, bs_score))
             loss_i = dy.cmult(logsoft, self.true_score - bs_score)
             losses.append(
                 dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))
         loss.add_loss("reinforce", dy.sum_elems(dy.esum(losses)))
         loss.add_loss("reinf_baseline",
                       dy.sum_elems(dy.esum(baseline_loss)))
     else:
         loss.add_loss(
             "reinforce",
             dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts))))
     return loss
Exemplo n.º 9
0
    def calc_loss(self, translator: 'translator.AutoRegressiveTranslator',
                  initial_state: 'translator.AutoRegressiveDecoderState',
                  src: Union[xnmt.input.Input,
                             'batcher.Batch'], trg: Union[xnmt.input.Input,
                                                          'batcher.Batch']):
        dec_state = initial_state
        trg_mask = trg.mask if xnmt.batcher.is_batched(trg) else None
        losses = []
        seq_len = trg.sent_len()
        if xnmt.batcher.is_batched(src):
            for j, single_trg in enumerate(trg):
                assert single_trg.sent_len(
                ) == seq_len  # assert consistent length
                assert 1 == len([
                    i for i in range(seq_len)
                    if (trg_mask is None or trg_mask.np_arr[j, i] == 0)
                    and single_trg[i] == Vocab.ES
                ])  # assert exactly one unmasked ES token
        input_word = None
        for i in range(seq_len):
            ref_word = AutoRegressiveMLELoss._select_ref_words(
                trg, i, truncate_masked=self.truncate_dec_batches)
            if self.truncate_dec_batches and xnmt.batcher.is_batched(ref_word):
                dec_state.rnn_state, ref_word = xnmt.batcher.truncate_batches(
                    dec_state.rnn_state, ref_word)
            dec_state, word_loss = translator.calc_loss_one_step(
                dec_state, ref_word, input_word)
            if not self.truncate_dec_batches and xnmt.batcher.is_batched(
                    src) and trg_mask is not None:
                word_loss = trg_mask.cmult_by_timestep_expr(word_loss,
                                                            i,
                                                            inverse=True)
            losses.append(word_loss)
            input_word = ref_word

        if self.truncate_dec_batches:
            loss_expr = dy.esum([dy.sum_batches(wl) for wl in losses])
        else:
            loss_expr = dy.esum(losses)
        return FactoredLossExpr({"mle": loss_expr})