示例#1
0
    def training_step(self, src, trg):
        """
    Performs forward pass, backward pass, parameter update for the given minibatch
    """
        loss_builder = LossBuilder()
        standard_loss = self.model.calc_loss(src, trg, self.loss_calculator)
        if standard_loss.__class__ == LossBuilder:
            loss = None
            for loss_name, loss_expr in standard_loss.loss_nodes:
                loss_builder.add_loss(loss_name, loss_expr)
                loss = loss_expr if not loss else loss + loss_expr
            standard_loss = loss

        else:
            loss_builder.add_loss("loss", standard_loss)

        additional_loss = self.model.calc_additional_loss(
            dy.nobackprop(-standard_loss))
        if additional_loss != None:
            loss_builder.add_loss("additional_loss", additional_loss)

        loss_value = loss_builder.compute()
        self.logger.update_epoch_loss(src, trg, loss_builder)
        self.logger.report_train_process()

        return loss_value
示例#2
0
 def calc_loss(self, src, trg, loss_calculator):
   sub_losses = collections.defaultdict(list)
   for model in self.models:
     for loss_name, loss in model.calc_loss(src, trg, loss_calculator).loss_values.items():
       sub_losses[loss_name].append(loss)
   model_loss = LossBuilder()
   for loss_name, losslist in sub_losses.items():
     # TODO: dy.average(losslist)  _or_  dy.esum(losslist) / len(self.models) ?
     #       -- might not be the same if not all models return all losses
     model_loss.add_loss(loss_name, dy.average(losslist))
   return model_loss
示例#3
0
    def calc_loss(self, src, trg, loss_calculator):
        self.start_sent(src)
        embeddings = self.src_embedder.embed_sent(src)
        encodings = self.encoder(embeddings)
        self.attender.init_sent(encodings)
        # Initialize the hidden state from the encoder
        ss = mark_as_batch([Vocab.SS] *
                           len(src)) if is_batched(src) else Vocab.SS
        dec_state = self.decoder.initial_state(self.encoder.get_final_states(),
                                               self.trg_embedder.embed(ss))
        # Compose losses
        model_loss = LossBuilder()
        model_loss.add_loss("mle", loss_calculator(self, dec_state, src, trg))

        if self.calc_global_fertility or self.calc_attention_entropy:
            # philip30: I assume that attention_vecs is already masked src wisely.
            # Now applying the mask to the target
            masked_attn = self.attender.attention_vecs
            if trg.mask is not None:
                trg_mask = trg.mask.get_active_one_mask().transpose()
                masked_attn = [
                    dy.cmult(attn, dy.inputTensor(mask, batched=True))
                    for attn, mask in zip(masked_attn, trg_mask)
                ]

        if self.calc_global_fertility:
            model_loss.add_loss("fertility",
                                self.global_fertility(masked_attn))
        if self.calc_attention_entropy:
            model_loss.add_loss("H(attn)", self.attention_entropy(masked_attn))

        return model_loss
示例#4
0
  def __call__(self, translator, dec_state, src, trg):
    # TODO: apply trg.mask ?
    samples = []
    logsofts = []
    self.bs = []
    done = [False for _ in range(len(trg))]
    for _ in range(self.sample_length):
      dec_state.context = translator.attender.calc_context(dec_state.rnn_state.output())
      if self.use_baseline:
        h_t = dy.tanh(translator.decoder.context_projector(dy.concatenate([dec_state.rnn_state.output(), dec_state.context])))
        self.bs.append(self.baseline(dy.nobackprop(h_t)))
      logsoft = dy.log_softmax(translator.decoder.get_scores(dec_state))
      sample = logsoft.tensor_value().categorical_sample_log_prob().as_numpy()[0]
      # Keep track of previously sampled EOS
      sample = [sample_i if not done_i else Vocab.ES for sample_i, done_i in zip(sample, done)]
      # Appending and feeding in the decoder
      logsoft = dy.pick_batch(logsoft, sample)
      logsofts.append(logsoft)
      samples.append(sample)
      dec_state = translator.decoder.add_input(dec_state, translator.trg_embedder.embed(xnmt.batcher.mark_as_batch(sample)))
      # Check if we are done.
      done = list(six.moves.map(lambda x: x == Vocab.ES, sample))
      if all(done):
        break

    samples = np.stack(samples, axis=1).tolist()
    self.eval_score = []
    for trg_i, sample_i in zip(trg, samples):
      # Removing EOS
      try:
        idx = sample_i.index(Vocab.ES)
        sample_i = sample_i[:idx]
      except ValueError:
        pass
      try:
        idx = trg_i.words.index(Vocab.ES)
        trg_i.words = trg_i.words[:idx]
      except ValueError:
        pass
      # Calculate the evaluation score
      score = 0 if not len(sample_i) else self.evaluation_metric.evaluate_fast(trg_i.words, sample_i)
      self.eval_score.append(score)
    self.true_score = dy.inputTensor(self.eval_score, batched=True)
    loss = LossBuilder()

    if self.use_baseline:
      for i, (score, _) in enumerate(zip(self.bs, logsofts)):
        logsofts[i] = dy.cmult(logsofts[i], score - self.true_score)
      loss.add_loss("Reinforce", dy.sum_elems(dy.esum(logsofts)))

    else:
        loss.add_loss("Reinforce", dy.sum_elems(dy.cmult(-self.true_score, dy.esum(logsofts))))

    if self.use_baseline:
      baseline_loss = []
      for bs in self.bs:
        baseline_loss.append(dy.squared_distance(self.true_score, bs))
      loss.add_loss("Baseline", dy.sum_elems(dy.esum(baseline_loss)))
    return loss
示例#5
0
  def eval(self):
    if self.src_data == None:
      self.src_data, self.ref_data, self.src_batches, self.ref_batches = \
        xnmt.input_reader.read_parallel_corpus(self.model.src_reader, self.model.trg_reader,
                                        self.src_file, self.ref_file, batcher=self.batcher,
                                        max_src_len=self.max_src_len, max_trg_len=self.max_trg_len)
    loss_val = LossScalarBuilder()
    ref_words_cnt = 0
    for src, trg in zip(self.src_batches, self.ref_batches):
      dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY)

      loss_builder = LossBuilder()
      standard_loss = self.model.calc_loss(src, trg, self.loss_calculator)
      additional_loss = self.model.calc_additional_loss(standard_loss)
      loss_builder.add_loss("standard_loss", standard_loss)
      loss_builder.add_loss("additional_loss", additional_loss)

      ref_words_cnt += self.model.trg_reader.count_words(trg)
      loss_val += loss_builder.get_loss_stats()

    loss_stats = {k: v/ref_words_cnt for k, v in loss_val.items()}

    try:
      return LossScore(loss_stats[self.model.get_primary_loss()], loss_stats=loss_stats, desc=self.desc), ref_words_cnt
    except KeyError:
      raise RuntimeError("Did you wrap your loss calculation with LossBuilder({'primary_loss': loss_value}) ?")
示例#6
0
    def one_epoch(self, update_weights=True):
        """
    :param update_weights: Whether to perform backward pass & update weights (useful for debugging)
    """

        self.logger.new_epoch()

        if self.args["reload_command"] is not None:
            self._augment_data_next_epoch()

        self.model.set_train(update_weights)
        order = list(range(0, len(self.train_src)))
        np.random.shuffle(order)
        for batch_num in order:
            src = self.train_src[batch_num]
            trg = self.train_trg[batch_num]

            # Loss calculation
            dy.renew_cg()
            loss_builder = LossBuilder()
            standard_loss = self.model.calc_loss(src, trg)

            if standard_loss.__class__ == LossBuilder:
                loss = None
                for loss_name, loss_expr in standard_loss.loss_nodes:
                    loss_builder.add_loss(loss_name, loss_expr)
                    loss = loss_expr if not loss else loss + loss_expr
                standard_loss = loss

            else:
                loss_builder.add_loss("loss", standard_loss)

            additional_loss = self.model.calc_additional_loss(
                dy.nobackprop(-standard_loss))
            if additional_loss != None:
                loss_builder.add_loss("additional_loss", additional_loss)

            # Log the loss sum
            loss_value = loss_builder.compute()
            self.logger.update_epoch_loss(src, trg, loss_builder)
            if update_weights:
                loss_value.backward()
                self.trainer.update()

            # Devel reporting
            self.logger.report_train_process()
            if self.logger.should_report_dev():
                self.dev_evaluation()

            self.model.new_epoch()
示例#7
0
  def on_calc_additional_loss(self, translator_loss):
    if not self.learn_segmentation or self.segment_decisions is None:
      return None
    reward = -translator_loss["mle"]
    if not self.log_reward:
      reward = dy.exp(reward)
    reward = dy.nobackprop(reward)

    # Make sure that reward is not scalar, but rather based on the each batch item
    assert reward.dim()[1] == len(self.src_sent)
    # Mask
    enc_mask = self.enc_mask.get_active_one_mask().transpose() if self.enc_mask is not None else None
    # Compose the lose
    ret = LossBuilder()
    ## Length prior
    alpha = self.length_prior_alpha.value() if self.length_prior_alpha is not None else 0
    if alpha > 0:
      reward += self.segment_length_prior * alpha
    # reward z-score normalization
    if self.z_normalization:
      reward = dy.cdiv(reward-dy.mean_batches(reward), dy.std_batches(reward) + EPS)
    ## Baseline Loss
    if self.use_baseline:
      baseline_loss = []
      for i, baseline in enumerate(self.bs):
        loss = dy.squared_distance(reward, baseline)
        if enc_mask is not None:
          loss = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), loss)
        baseline_loss.append(loss)

      ret.add_loss("Baseline", dy.esum(baseline_loss))

    if self.print_sample:
      print(dy.exp(self.segment_logsoftmaxes[i]).npvalue().transpose()[0])
    ## Reinforce Loss
    lmbd = self.lmbd.value()
    if lmbd > 0.0:
      reinforce_loss = []
      # Calculating the loss of the baseline and reinforce
      for i in range(len(self.segment_decisions)):
        ll = dy.pick_batch(self.segment_logsoftmaxes[i], self.segment_decisions[i])
        if self.use_baseline:
          r_i = reward - dy.nobackprop(self.bs[i])
        else:
          r_i = reward
        if enc_mask is not None:
          ll = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), ll)
        reinforce_loss.append(r_i * -ll)
      loss = dy.esum(reinforce_loss) * lmbd
      ret.add_loss("Reinforce", loss)
    if self.confidence_penalty:
      ls_loss = self.confidence_penalty(self.segment_logsoftmaxes, enc_mask)
      ret.add_loss("Confidence Penalty", ls_loss)
    # Total Loss
    return ret
示例#8
0
    def calc_loss(self, src, trg, loss_cal=None, infer_prediction=False):
        self.start_sent(src)
        if not xnmt.batcher.is_batched(src):
            src = xnmt.batcher.mark_as_batch([src])
        if not xnmt.batcher.is_batched(trg):
            trg = xnmt.batcher.mark_as_batch([trg])
        src_words = np.array([[Vocab.SS] + x.words for x in src])
        batch_size, src_len = src_words.shape

        if isinstance(src.mask, type(None)):
            src_mask = np.zeros((batch_size, src_len), dtype=np.int)
        else:
            src_mask = np.concatenate([
                np.zeros((batch_size, 1), dtype=np.int),
                src.mask.np_arr.astype(np.int)
            ],
                                      axis=1)

        src_embeddings = self.sentence_block_embed(
            self.src_embedder.embeddings, src_words, src_mask)
        src_embeddings = self.make_input_embedding(src_embeddings, src_len)

        trg_words = np.array(
            list(map(lambda x: [Vocab.SS] + x.words[:-1], trg)))
        batch_size, trg_len = trg_words.shape

        if isinstance(trg.mask, type(None)):
            trg_mask = np.zeros((batch_size, trg_len), dtype=np.int)
        else:
            trg_mask = trg.mask.np_arr.astype(np.int)

        trg_embeddings = self.sentence_block_embed(
            self.trg_embedder.embeddings, trg_words, trg_mask)
        trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len)

        xx_mask = self.make_attention_mask(src_mask, src_mask)
        xy_mask = self.make_attention_mask(trg_mask, src_mask)
        yy_mask = self.make_attention_mask(trg_mask, trg_mask)
        yy_mask *= self.make_history_mask(trg_mask)

        z_blocks = self.encoder(src_embeddings, xx_mask)
        h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask)

        if infer_prediction:
            y_len = h_block.dim()[0][1]
            last_col = dy.pick(h_block, dim=1, index=y_len - 1)
            logits = self.decoder.output(last_col)
            return logits

        ref_list = list(
            itertools.chain.from_iterable(map(lambda x: x.words, trg)))
        concat_t_block = (1 -
                          trg_mask.ravel()).reshape(-1) * np.array(ref_list)
        loss = self.decoder.output_and_loss(h_block, concat_t_block)
        return LossBuilder({"mle": loss})
示例#9
0
 def compute_dev_loss(self):
     loss_builder = LossBuilder()
     trg_words_cnt = 0
     for src, trg in zip(self.dev_src, self.dev_trg):
         dy.renew_cg()
         standard_loss = self.model.calc_loss(src, trg)
         loss_builder.add_loss("loss", standard_loss)
         trg_words_cnt += self.logger.count_trg_words(trg)
         loss_builder.compute()
     return trg_words_cnt, LossScore(loss_builder.sum() / trg_words_cnt)
示例#10
0
 def on_calc_additional_loss(self, reward):
     if not self.learn_segmentation:
         return None
     ret = LossBuilder()
     if self.length_prior_alpha > 0:
         reward += self.segment_length_prior * self.length_prior_alpha
     reward = dy.cdiv(reward - dy.mean_batches(reward),
                      dy.std_batches(reward))
     # Baseline Loss
     if self.use_baseline:
         baseline_loss = []
         for i, baseline in enumerate(self.bs):
             baseline_loss.append(dy.squared_distance(reward, baseline))
         ret.add_loss("Baseline", dy.esum(baseline_loss))
     # Reinforce Loss
     lmbd = self.lmbd.get_value(self.warmup_counter)
     if lmbd > 0.0:
         reinforce_loss = []
         # Calculating the loss of the baseline and reinforce
         for i in range(len(self.segment_decisions)):
             ll = dy.pick_batch(self.segment_logsoftmaxes[i],
                                self.segment_decisions[i])
             if self.use_baseline:
                 r_i = reward - self.bs[i]
             else:
                 r_i = reward
             reinforce_loss.append(dy.logistic(r_i) * ll)
         ret.add_loss("Reinforce", -dy.esum(reinforce_loss) * lmbd)
     # Total Loss
     return ret
示例#11
0
  def training_step(self, src, trg):
    """
    Performs forward pass, backward pass, parameter update for the given minibatch
    """
    loss_builder = LossBuilder()
    standard_loss = self.model.calc_loss(src, trg, self.loss_calculator)
    additional_loss = self.model.calc_additional_loss(standard_loss)
    loss_builder.add_loss("standard_loss", standard_loss)
    loss_builder.add_loss("additional_loss", additional_loss)

    loss_value = loss_builder.compute()
    self.logger.update_epoch_loss(src, trg, loss_builder.get_loss_stats())
    self.logger.report_train_process()

    return loss_value
示例#12
0
 def training_step(self, src, trg):
     """
 Performs forward pass, backward pass, parameter update for the given minibatch
 """
     loss_builder = LossBuilder()
     standard_loss = self.model.calc_loss(src, trg, self.loss_calculator)
     additional_loss = self.model.calc_additional_loss(standard_loss)
     loss_builder.add_loss("standard_loss", standard_loss)
     loss_builder.add_loss("additional_loss", additional_loss)
     return loss_builder
示例#13
0
    def __call__(self, translator, initial_state, src, trg):
        batch_size = len(trg)
        uniques = [set() for _ in range(batch_size)]
        deltas = []
        probs = []

        search_outputs = translator.search_strategy.generate_output(
            translator, initial_state, forced_trg_ids=trg)
        for search_output in search_outputs:
            logprob = search_output.logsoftmaxes
            sample = search_output.word_ids
            attentions = search_output.attentions

            logprob = dy.esum(logprob) * self.alpha
            # Calculate the evaluation score
            eval_score = np.zeros(batch_size, dtype=float)
            mask = np.zeros(batch_size, dtype=float)
            for j in range(batch_size):
                ref_j = self.remove_eos(trg[j].words)
                hyp_j = self.remove_eos(sample[j].tolist())
                if self.unique_sample:
                    hash_val = hash(tuple(hyp_j))
                    if len(hyp_j) == 0 or hash_val in uniques[j]:
                        mask[j] = -INFINITY
                        continue
                    else:
                        # Count this sample in
                        uniques[j].add(hash_val)
                    # Calc evaluation score
                eval_score[j] = self.evaluation_metric.evaluate(ref_j, hyp_j) * \
                                (-1 if self.inv_eval else 1)
            # Appending the delta and logprob of this sample
            prob = logprob + dy.inputTensor(mask, batched=True)
            deltas.append(dy.inputTensor(eval_score, batched=True))
            probs.append(prob)
        sample_prob = dy.softmax(dy.concatenate(probs))
        deltas = dy.concatenate(deltas)
        risk = dy.sum_elems(dy.cmult(sample_prob, deltas))

        ### Debug
        #print(sample_prob.npvalue().transpose()[0])
        #print(deltas.npvalue().transpose()[0])
        #print("----------------------")
        ### End debug

        return LossBuilder({"risk": risk})
示例#14
0
 def __call__(self, translator, initial_state, src, trg):
     # TODO(philip30): currently only using the best hypothesis / first sample for reinforce loss
     # A small further implementation is needed if we want to do reinforce with multiple samples.
     search_output = translator.search_strategy.generate_output(
         translator, initial_state)[0]
     # Calculate evaluation scores
     self.eval_score = []
     for trg_i, sample_i in zip(trg, search_output.word_ids):
         # Removing EOS
         sample_i = self.remove_eos(sample_i.tolist())
         ref_i = self.remove_eos(trg_i.words)
         # Evaluating
         if len(sample_i) == 0:
             score = 0
         else:
             score = self.evaluation_metric.evaluate(ref_i, sample_i) * \
                     (-1 if self.inv_eval else 1)
         self.eval_score.append(score)
     self.true_score = dy.inputTensor(self.eval_score, batched=True)
     # Composing losses
     loss = LossBuilder()
     if self.use_baseline:
         baseline_loss = []
         losses = []
         for state, logsoft, mask in zip(search_output.state,
                                         search_output.logsoftmaxes,
                                         search_output.mask):
             bs_score = self.baseline(state)
             baseline_loss.append(
                 dy.squared_distance(self.true_score, bs_score))
             loss_i = dy.cmult(logsoft, self.true_score - bs_score)
             losses.append(
                 dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))
         loss.add_loss("reinforce", dy.sum_elems(dy.esum(losses)))
         loss.add_loss("reinf_baseline",
                       dy.sum_elems(dy.esum(baseline_loss)))
     else:
         loss.add_loss(
             "reinforce",
             dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts))))
     return loss
示例#15
0
    def calc_loss(self, src, trg, loss_calculator):
        self.start_sent(src)
        tokens = [x[0] for x in src]
        transitions = [x[1] for x in src]
        print("Current Batch: " + str(len(tokens)) + " pairs.\n")
        is_batched = xnmt.batcher.is_batched(src)
        tokens = xnmt.batcher.mark_as_batch(tokens)
        embeddings = self.src_embedder.embed_sent(tokens)
        encodings = self.encoder(embeddings, transitions)
        self.attender.init_sent(encodings)
        #import pdb;pdb.set_trace()
        # Initialize the hidden state from the encoder
        ss = mark_as_batch(
            [Vocab.SS] *
            len(tokens)) if xnmt.batcher.is_batched(src) else Vocab.SS
        dec_state = self.decoder.initial_state(self.encoder._final_states,
                                               self.trg_embedder.embed(ss))
        # Compose losses
        model_loss = LossBuilder()
        loss, wer = loss_calculator(self, dec_state, src, trg)
        model_loss.add_loss("mle", loss)
        print("wer_b:" + str(wer))

        if self.calc_global_fertility or self.calc_attention_entropy:
            # philip30: I assume that attention_vecs is already masked src wisely.
            # Now applying the mask to the target
            masked_attn = self.attender.attention_vecs
            if trg.mask is not None:
                trg_mask = trg.mask.get_active_one_mask().transpose()
                masked_attn = [
                    dy.cmult(attn, dy.inputTensor(mask, batched=True))
                    for attn, mask in zip(masked_attn, trg_mask)
                ]

        if self.calc_global_fertility:
            model_loss.add_loss("fertility",
                                self.global_fertility(masked_attn))
        if self.calc_attention_entropy:
            model_loss.add_loss("H(attn)", self.attention_entropy(masked_attn))

        return model_loss