예제 #1
0
  def calc_loss(self, translator, src, trg):
    search_outputs = translator.generate_search_output(src, self.search_strategy)
    sign = -1 if self.inv_eval else 1

    total_loss = FactoredLossExpr()
    for search_output in search_outputs:
      self.eval_score = []
      for trg_i, sample_i in zip(trg, search_output.word_ids):
        # Removing EOS
        sample_i = self.remove_eos(sample_i.tolist())
        ref_i = trg_i.words[:trg_i.len_unpadded()]
        score = self.evaluation_metric.evaluate_one_sent(ref_i, sample_i)
        self.eval_score.append(sign * score)
      self.reward = dy.inputTensor(self.eval_score, batched=True)
      # Composing losses
      loss = FactoredLossExpr()
      if self.baseline is not None:
        baseline_loss = []
        losses = []
        for state, logsoft, mask in zip(search_output.state,
                                        search_output.logsoftmaxes,
                                        search_output.mask):
          bs_score = self.baseline.transform(state)
          baseline_loss.append(dy.squared_distance(self.reward, bs_score))
          loss_i = dy.cmult(logsoft, self.reward - bs_score)
          valid = list(np.nonzero(mask)[0])
          losses.append(dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))
        loss.add_loss("reinforce", dy.sum_elems(dy.esum(losses)))
        loss.add_loss("reinf_baseline", dy.sum_elems(dy.esum(baseline_loss)))
      else:
        loss.add_loss("reinforce", dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts))))
      total_loss.add_factored_loss_expr(loss)
    return loss
예제 #2
0
 def calc_loss(self,
               model: 'model_base.ConditionedModel',
               src: Union[sent.Sentence, 'batchers.Batch'],
               trg: Union[sent.Sentence, 'batchers.Batch']):
   total_loss = FactoredLossExpr()
   for loss, weight in zip(self.losses, self.loss_weight):
     total_loss.add_factored_loss_expr(loss.calc_loss(model, src, trg) * weight)
   return total_loss
예제 #3
0
    def calc_loss(self, src, trg, infer_prediction=False):
        event_trigger.start_sent(src)
        if not batchers.is_batched(src):
            src = batchers.mark_as_batch([src])
        if not batchers.is_batched(trg):
            trg = batchers.mark_as_batch([trg])
        src_words = np.array([[Vocab.SS] + x.words for x in src])
        batch_size, src_len = src_words.shape

        if isinstance(src.mask, type(None)):
            src_mask = np.zeros((batch_size, src_len), dtype=np.int)
        else:
            src_mask = np.concatenate([
                np.zeros((batch_size, 1), dtype=np.int),
                src.mask.np_arr.astype(np.int)
            ],
                                      axis=1)

        src_embeddings = self.sentence_block_embed(
            self.src_embedder.embeddings, src_words, src_mask)
        src_embeddings = self.make_input_embedding(src_embeddings, src_len)

        trg_words = np.array(
            list(map(lambda x: [Vocab.SS] + x.words[:-1], trg)))
        batch_size, trg_len = trg_words.shape

        if isinstance(trg.mask, type(None)):
            trg_mask = np.zeros((batch_size, trg_len), dtype=np.int)
        else:
            trg_mask = trg.mask.np_arr.astype(np.int)

        trg_embeddings = self.sentence_block_embed(
            self.trg_embedder.embeddings, trg_words, trg_mask)
        trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len)

        xx_mask = self.make_attention_mask(src_mask, src_mask)
        xy_mask = self.make_attention_mask(trg_mask, src_mask)
        yy_mask = self.make_attention_mask(trg_mask, trg_mask)
        yy_mask *= self.make_history_mask(trg_mask)

        z_blocks = self.encoder.transduce(src_embeddings, xx_mask)
        h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask)

        if infer_prediction:
            y_len = h_block.dim()[0][1]
            last_col = dy.pick(h_block, dim=1, index=y_len - 1)
            logits = self.decoder.output(last_col)
            return logits

        ref_list = list(
            itertools.chain.from_iterable(map(lambda x: x.words, trg)))
        concat_t_block = (1 -
                          trg_mask.ravel()).reshape(-1) * np.array(ref_list)
        loss = self.decoder.output_and_loss(h_block, concat_t_block)
        return FactoredLossExpr({"mle": loss})
예제 #4
0
 def calc_loss(self,
               model: 'model_base.ConditionedModel',
               src: Union[sent.Sentence, 'batcher.Batch'],
               trg: Union[sent.Sentence, 'batcher.Batch']):
   loss_builder = FactoredLossExpr()
   for _ in range(self.repeat):
     standard_loss = self.child_loss.calc_loss(model, src, trg)
     additional_loss = event_trigger.calc_additional_loss(trg, model, standard_loss)
     loss_builder.add_factored_loss_expr(standard_loss)
     loss_builder.add_factored_loss_expr(additional_loss)
   return loss_builder
예제 #5
0
 def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression:
   sub_losses = collections.defaultdict(list)
   for model in self.models:
     for loss_name, loss in model.calc_nll(src, trg).expr_factors.items():
       sub_losses[loss_name].append(loss)
   model_loss = FactoredLossExpr()
   for loss_name, losslist in sub_losses.items():
     # TODO: dy.average(losslist)  _or_  dy.esum(losslist) / len(self.models) ?
     #       -- might not be the same if not all models return all losses
     model_loss.add_loss(loss_name, dy.average(losslist))
   return model_loss
예제 #6
0
 def calc_loss(self,
               model: 'model_base.ConditionedModel',
               src: Union[sent.Sentence, 'batchers.Batch'],
               trg: Union[sent.Sentence, 'batchers.Batch']):
   assert hasattr(model, "attender") and hasattr(model.attender, "attention_vecs"), \
          "Must be called after MLELoss with models that have attender."
   masked_attn = model.attender.attention_vecs
   if trg.mask is not None:
     trg_mask = 1-(trg.mask.np_arr.transpose())
     masked_attn = [dy.cmult(attn, dy.inputTensor(mask, batched=True)) for attn, mask in zip(masked_attn, trg_mask)]
   
   loss = self.global_fertility(masked_attn)
   return FactoredLossExpr({"global_fertility": loss})
예제 #7
0
    def calc_loss(self, translator, src, trg):
        batch_size = trg.batch_size()
        uniques = [set() for _ in range(batch_size)]
        deltas = []
        probs = []
        sign = -1 if self.inv_eval else 1
        search_outputs = translator.generate_search_output(
            src, self.search_strategy)
        for search_output in search_outputs:
            logprob = search_output.logsoftmaxes
            sample = search_output.word_ids
            attentions = search_output.attentions

            logprob = dy.esum(logprob) * self.alpha
            # Calculate the evaluation score
            eval_score = np.zeros(batch_size, dtype=float)
            mask = np.zeros(batch_size, dtype=float)
            for j in range(batch_size):
                ref_j = self.remove_eos(trg[j].words)
                hyp_j = self.remove_eos(sample[j].tolist())
                if self.unique_sample:
                    hash_val = hash(tuple(hyp_j))
                    if len(hyp_j) == 0 or hash_val in uniques[j]:
                        mask[j] = -1e20  # represents negative infinity
                        continue
                    else:
                        uniques[j].add(hash_val)
                    # Calc evaluation score
                eval_score[j] = self.evaluation_metric.evaluate_one_sent(
                    ref_j, hyp_j) * sign
            # Appending the delta and logprob of this sample
            prob = logprob + dy.inputTensor(mask, batched=True)
            deltas.append(dy.inputTensor(eval_score, batched=True))
            probs.append(prob)
        sample_prob = dy.softmax(dy.concatenate(probs))
        deltas = dy.concatenate(deltas)
        risk = dy.sum_elems(dy.cmult(sample_prob, deltas))

        ### Debug
        #print(sample_prob.npvalue().transpose()[0])
        #print(deltas.npvalue().transpose()[0])
        #print("----------------------")
        ### End debug

        return FactoredLossExpr({"risk": risk})
예제 #8
0
 def on_calc_additional_loss(self, trg, generator, generator_loss):
     if self.policy_learning is None:
         return None
     reward = FactoredLossExpr()
     reward.add_loss("generator",
                     -dy.inputTensor(generator_loss.value(), batched=True))
     if self.length_prior is not None:
         reward.add_loss('length_prior',
                         self.length_prior.log_ll(self.seg_size_unpadded))
     reward_value = reward.value()
     if trg.batch_size() == 1:
         reward_value = [reward_value]
     reward_tensor = dy.inputTensor(reward_value, batched=True)
     ### Calculate losses
     try:
         return self.policy_learning.calc_loss(reward_tensor)
     finally:
         self.reward = reward
         if self.train and self.reporter is not None:
             self.reporter.report_process(self)
예제 #9
0
 def on_calc_additional_loss(self, trg, generator, generator_loss):
     if self.policy_learning is None:
         return None
     trg_counts = dy.inputTensor([t.len_unpadded() for t in trg],
                                 batched=True)
     reward = FactoredLossExpr()
     # Adding all reward from the translator
     for loss_key, loss_value in generator_loss.get_nobackprop_loss().items(
     ):
         if loss_key == 'mle':
             reward.add_loss('mle', dy.cdiv(-loss_value, trg_counts))
         else:
             reward.add_loss(loss_key, -loss_value)
     if self.length_prior is not None:
         reward.add_loss('seg_lp',
                         self.length_prior.log_ll(self.seg_size_unpadded))
     reward = dy.inputTensor(reward.value(), batched=True)
     ### Calculate losses
     try:
         return self.policy_learning.calc_loss(reward)
     finally:
         self.reward = reward
         if self.reporter is not None:
             self.reporter.report_process(self)
예제 #10
0
 def calc_loss(self,
               model: 'model_base.ConditionedModel',
               src: Union[sent.Sentence, 'batchers.Batch'],
               trg: Union[sent.Sentence, 'batchers.Batch']):
   loss = model.calc_nll(src, trg)
   return FactoredLossExpr({"mle": loss})