示例#1
0
  def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> losses.LossExpr:
    loss_values = [model.calc_nll(src, trg).loss_value() for model in self.models]
    ret_expr = []
    ret_units = []
    for loss_expr, unit in loss_values:
      ret_expr.append(loss_expr)
      ret_units.append(unit)

    return losses.LossExpr(dy.esum(ret_expr), np.sum(ret_units))
示例#2
0
 def _perform_calc_loss(
     self, model: 'model_base.ConditionedModel',
     src: Union[sent.Sentence, 'batchers.Batch'],
     trg: Union[sent.Sentence,
                'batchers.Batch']) -> losses.FactoredLossExpr:
     batch_size = trg.batch_size()
     uniques = [set() for _ in range(batch_size)]
     deltas = []
     probs = []
     sign = -1 if self.inv_eval else 1
     search_outputs = model.generate_search_output(src,
                                                   self.search_strategy)
     # TODO: Fix this
     for search_output in search_outputs:
         assert len(search_output.word_ids) == 1
         assert search_output.word_ids[0].shape == (len(
             search_output.state), )
         logprob = []
         for word, state in zip(search_output.word_ids[0],
                                search_output.state):
             lpdist = model.decoder.scorer.calc_log_probs(state.as_vector())
             lp = dy.pick(lpdist, word)
             logprob.append(lp)
         sample = search_output.word_ids
         logprob = dy.esum(logprob) * self.alpha
         # Calculate the evaluation score
         eval_score = np.zeros(batch_size, dtype=float)
         mask = np.zeros(batch_size, dtype=float)
         for j in range(batch_size):
             ref_j = utils.remove_eos(trg[j].words, vocabs.Vocab.ES)
             hyp_j = utils.remove_eos(sample[j].tolist(), vocabs.Vocab.ES)
             if self.unique_sample:
                 hash_val = hash(tuple(hyp_j))
                 if len(hyp_j) == 0 or hash_val in uniques[j]:
                     mask[j] = -1e20  # represents negative infinity
                     continue
                 else:
                     uniques[j].add(hash_val)
                 # Calc evaluation score
             eval_score[j] = self.evaluation_metric.evaluate_one_sent(
                 ref_j, hyp_j) * sign
         # Appending the delta and logprob of this sample
         prob = logprob + dy.inputTensor(mask, batched=True)
         deltas.append(dy.inputTensor(eval_score, batched=True))
         probs.append(prob)
     sample_prob = dy.softmax(dy.concatenate(probs))
     deltas = dy.concatenate(deltas)
     risk = dy.sum_elems(dy.cmult(sample_prob, deltas))
     units = [t.len_unpadded() for t in trg]
     return losses.FactoredLossExpr({"risk": losses.LossExpr(risk, units)})
示例#3
0
    def _perform_calc_loss(
        self, model: 'model_base.ConditionedModel',
        src: Union[sent.Sentence, 'batchers.Batch'],
        trg: Union[sent.Sentence,
                   'batchers.Batch']) -> losses.FactoredLossExpr:
        assert hasattr(model, "attender") and hasattr(model.attender, "attention_vecs"), \
               "Must be called after MLELoss with models that have attender."

        masked_attn = model.attender.attention_vecs
        if trg.mask is not None:
            trg_mask = 1 - (trg.mask.np_arr.transpose())
            masked_attn = [
                dy.cmult(attn, dy.inputTensor(mask, batched=True))
                for attn, mask in zip(masked_attn, trg_mask)
            ]
        loss = dy.sum_elems(dy.square(1 - dy.esum(masked_attn)))
        units = [t.len_unpadded() for t in trg]
        return losses.FactoredLossExpr(
            {"global_fertility": losses.LossExpr(loss, units)})
示例#4
0
    def _perform_calc_loss(
        self, model: 'model_base.ConditionedModel',
        src: Union[sent.Sentence, 'batchers.Batch'],
        trg: Union[sent.Sentence,
                   'batchers.Batch']) -> losses.FactoredLossExpr:
        search_outputs = model.generate_search_output(src,
                                                      self.search_strategy)
        sign = -1 if self.inv_eval else 1

        # TODO: Fix units
        total_loss = collections.defaultdict(int)
        for search_output in search_outputs:
            # Calculate rewards
            eval_score = []
            for trg_i, sample_i in zip(trg, search_output.word_ids):
                # Removing EOS
                sample_i = utils.remove_eos(sample_i.tolist(), vocabs.Vocab.ES)
                ref_i = trg_i.words[:trg_i.len_unpadded()]
                score = self.evaluation_metric.evaluate_one_sent(
                    ref_i, sample_i)
                eval_score.append(sign * score)
            reward = dy.inputTensor(eval_score, batched=True)
            # Composing losses
            baseline_loss = []
            cur_losses = []
            for state, mask in zip(search_output.state, search_output.mask):
                bs_score = self.baseline.transform(
                    dy.nobackprop(state.as_vector()))
                baseline_loss.append(dy.squared_distance(reward, bs_score))
                logsoft = model.decoder.scorer.calc_log_probs(
                    state.as_vector())
                loss_i = dy.cmult(logsoft, reward - bs_score)
                cur_losses.append(
                    dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))

            total_loss["polc_loss"] += dy.sum_elems(dy.esum(cur_losses))
            total_loss["base_loss"] += dy.sum_elems(dy.esum(baseline_loss))
        units = [t.len_unpadded() for t in trg]
        total_loss = losses.FactoredLossExpr(
            {k: losses.LossExpr(v, units)
             for k, v in total_loss.items()})
        return losses.FactoredLossExpr({"risk": total_loss})
示例#5
0
    def calc_nll(self, src_batch, trg_batch) -> losses.LossExpr:
        event_trigger.start_sent(src_batch)
        self.create_trajectories(src_batch,
                                 trg_batch,
                                 force_oracle=not self._is_action_forced())

        batch_loss = []
        for src, trg, decoder_state in zip(src_batch, trg_batch,
                                           self.decoder_states):
            seq_loss = [
                self.decoder.calc_loss(decoder_state[i], trg[i])
                for i in range(len(decoder_state))
            ]
            batch_loss.append(dy.esum(seq_loss))

        dy.forward(batch_loss)
        total_loss = dy.concatenate_to_batch(batch_loss)
        total_units = [
            trg_batch[i].len_unpadded() for i in range(trg_batch.batch_size())
        ]
        return losses.LossExpr(total_loss, total_units)
示例#6
0
    def calc_policy_nll(self, src_batch, trg_batch) -> losses.LossExpr:
        assert self.policy_network is not None

        event_trigger.start_sent(src_batch)
        self.create_trajectories(src_batch,
                                 trg_batch,
                                 force_oracle=not self._is_action_forced())

        batch_loss = []
        for src, action, model_states in zip(src_batch, self.actions,
                                             self.model_states):
            policy_actions = model_states[-1].find_backward("policy_action")
            seq_ll = [
                dy.pick(act.log_likelihood, act.content)
                for act in policy_actions
            ]
            batch_loss.append(-dy.esum(seq_ll))

        dy.forward(batch_loss)
        total_loss = dy.concatenate_to_batch(batch_loss)
        total_units = [len(x) for x in self.actions]
        return losses.LossExpr(total_loss, total_units)
示例#7
0
  def calc_loss(self, policy_reward, results={}):
    """
    Calc policy networks loss.
    """
    assert len(policy_reward) == len(self.states), "There should be a reward for every action taken"
    batch_size = self.states[0].dim()[1]
    loss = {}

    # Calculate the baseline loss of the reinforce loss for each timestep:
    # b = W_b * s + b_b
    # R = r - b
    # Also calculate the baseline loss
    # b = r_p (predicted)
    # loss_b = squared_distance(r_p - r_r)
    rewards = []
    baseline_loss = []
    units = np.zeros(batch_size)
    for i, state in enumerate(self.states):
      r_p = self.baseline.transform(dy.nobackprop(state))
      rewards.append(policy_reward[i] - r_p)
      if self.valid_pos[i] is not None:
        r_p = dy.pick_batch_elems(r_p, self.valid_pos[i])
        r_r = dy.pick_batch_elems(policy_reward[i], self.valid_pos[i])
        units[self.valid_pos[i]] += 1
      else:
        r_r = policy_reward[i]
        units += 1
      baseline_loss.append(dy.sum_batches(dy.squared_distance(r_p, r_r)))
    loss["rl_baseline"] = losses.LossExpr(dy.esum(baseline_loss), units)

    # Z Normalization
    # R = R - mean(R) / std(R)
    rewards = dy.concatenate(rewards, d=0)
    r_dim = rewards.dim()
    if self.z_normalization:
      rewards_shape = dy.reshape(rewards, (r_dim[0][0], r_dim[1]))
      rewards_mean = dy.mean_elems(rewards_shape)
      rewards_std = dy.std_elems(rewards_shape) + 1e-20
      rewards = (rewards - rewards_mean.value()) / rewards_std.value()
    rewards = dy.nobackprop(rewards)
    # Calculate Confidence Penalty
    if self.confidence_penalty:
      loss["rl_confpen"] = self.confidence_penalty.calc_loss(self.policy_lls)

    # Calculate Reinforce Loss
    # L = - sum([R-b] * pi_ll)
    reinf_loss = []
    units = np.zeros(batch_size)
    for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)):
      reward = dy.pick(rewards, i)
      ll = dy.pick_batch(policy, action)
      if self.valid_pos[i] is not None:
        ll = dy.pick_batch_elems(ll, self.valid_pos[i])
        reward = dy.pick_batch_elems(reward, self.valid_pos[i])
        units[self.valid_pos[i]] += 1
      else:
        units += 1
      reinf_loss.append(dy.sum_batches(dy.cmult(ll, reward)))
    loss["rl_reinf"] = losses.LossExpr(-dy.esum(reinf_loss), units)

    # Pack up + return
    return losses.FactoredLossExpr(loss)