def calc_loss(self, translator, src, trg): search_outputs = translator.generate_search_output(src, self.search_strategy) sign = -1 if self.inv_eval else 1 total_loss = FactoredLossExpr() for search_output in search_outputs: self.eval_score = [] for trg_i, sample_i in zip(trg, search_output.word_ids): # Removing EOS sample_i = self.remove_eos(sample_i.tolist()) ref_i = trg_i.words[:trg_i.len_unpadded()] score = self.evaluation_metric.evaluate_one_sent(ref_i, sample_i) self.eval_score.append(sign * score) self.reward = dy.inputTensor(self.eval_score, batched=True) # Composing losses loss = FactoredLossExpr() if self.baseline is not None: baseline_loss = [] losses = [] for state, logsoft, mask in zip(search_output.state, search_output.logsoftmaxes, search_output.mask): bs_score = self.baseline.transform(state) baseline_loss.append(dy.squared_distance(self.reward, bs_score)) loss_i = dy.cmult(logsoft, self.reward - bs_score) valid = list(np.nonzero(mask)[0]) losses.append(dy.cmult(loss_i, dy.inputTensor(mask, batched=True))) loss.add_loss("reinforce", dy.sum_elems(dy.esum(losses))) loss.add_loss("reinf_baseline", dy.sum_elems(dy.esum(baseline_loss))) else: loss.add_loss("reinforce", dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts)))) total_loss.add_factored_loss_expr(loss) return loss
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']): total_loss = FactoredLossExpr() for loss, weight in zip(self.losses, self.loss_weight): total_loss.add_factored_loss_expr(loss.calc_loss(model, src, trg) * weight) return total_loss
def calc_loss(self, src, trg, infer_prediction=False): event_trigger.start_sent(src) if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) src_words = np.array([[Vocab.SS] + x.words for x in src]) batch_size, src_len = src_words.shape if isinstance(src.mask, type(None)): src_mask = np.zeros((batch_size, src_len), dtype=np.int) else: src_mask = np.concatenate([ np.zeros((batch_size, 1), dtype=np.int), src.mask.np_arr.astype(np.int) ], axis=1) src_embeddings = self.sentence_block_embed( self.src_embedder.embeddings, src_words, src_mask) src_embeddings = self.make_input_embedding(src_embeddings, src_len) trg_words = np.array( list(map(lambda x: [Vocab.SS] + x.words[:-1], trg))) batch_size, trg_len = trg_words.shape if isinstance(trg.mask, type(None)): trg_mask = np.zeros((batch_size, trg_len), dtype=np.int) else: trg_mask = trg.mask.np_arr.astype(np.int) trg_embeddings = self.sentence_block_embed( self.trg_embedder.embeddings, trg_words, trg_mask) trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len) xx_mask = self.make_attention_mask(src_mask, src_mask) xy_mask = self.make_attention_mask(trg_mask, src_mask) yy_mask = self.make_attention_mask(trg_mask, trg_mask) yy_mask *= self.make_history_mask(trg_mask) z_blocks = self.encoder.transduce(src_embeddings, xx_mask) h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask) if infer_prediction: y_len = h_block.dim()[0][1] last_col = dy.pick(h_block, dim=1, index=y_len - 1) logits = self.decoder.output(last_col) return logits ref_list = list( itertools.chain.from_iterable(map(lambda x: x.words, trg))) concat_t_block = (1 - trg_mask.ravel()).reshape(-1) * np.array(ref_list) loss = self.decoder.output_and_loss(h_block, concat_t_block) return FactoredLossExpr({"mle": loss})
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batcher.Batch'], trg: Union[sent.Sentence, 'batcher.Batch']): loss_builder = FactoredLossExpr() for _ in range(self.repeat): standard_loss = self.child_loss.calc_loss(model, src, trg) additional_loss = event_trigger.calc_additional_loss(trg, model, standard_loss) loss_builder.add_factored_loss_expr(standard_loss) loss_builder.add_factored_loss_expr(additional_loss) return loss_builder
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression: sub_losses = collections.defaultdict(list) for model in self.models: for loss_name, loss in model.calc_nll(src, trg).expr_factors.items(): sub_losses[loss_name].append(loss) model_loss = FactoredLossExpr() for loss_name, losslist in sub_losses.items(): # TODO: dy.average(losslist) _or_ dy.esum(losslist) / len(self.models) ? # -- might not be the same if not all models return all losses model_loss.add_loss(loss_name, dy.average(losslist)) return model_loss
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']): assert hasattr(model, "attender") and hasattr(model.attender, "attention_vecs"), \ "Must be called after MLELoss with models that have attender." masked_attn = model.attender.attention_vecs if trg.mask is not None: trg_mask = 1-(trg.mask.np_arr.transpose()) masked_attn = [dy.cmult(attn, dy.inputTensor(mask, batched=True)) for attn, mask in zip(masked_attn, trg_mask)] loss = self.global_fertility(masked_attn) return FactoredLossExpr({"global_fertility": loss})
def calc_loss(self, translator, src, trg): batch_size = trg.batch_size() uniques = [set() for _ in range(batch_size)] deltas = [] probs = [] sign = -1 if self.inv_eval else 1 search_outputs = translator.generate_search_output( src, self.search_strategy) for search_output in search_outputs: logprob = search_output.logsoftmaxes sample = search_output.word_ids attentions = search_output.attentions logprob = dy.esum(logprob) * self.alpha # Calculate the evaluation score eval_score = np.zeros(batch_size, dtype=float) mask = np.zeros(batch_size, dtype=float) for j in range(batch_size): ref_j = self.remove_eos(trg[j].words) hyp_j = self.remove_eos(sample[j].tolist()) if self.unique_sample: hash_val = hash(tuple(hyp_j)) if len(hyp_j) == 0 or hash_val in uniques[j]: mask[j] = -1e20 # represents negative infinity continue else: uniques[j].add(hash_val) # Calc evaluation score eval_score[j] = self.evaluation_metric.evaluate_one_sent( ref_j, hyp_j) * sign # Appending the delta and logprob of this sample prob = logprob + dy.inputTensor(mask, batched=True) deltas.append(dy.inputTensor(eval_score, batched=True)) probs.append(prob) sample_prob = dy.softmax(dy.concatenate(probs)) deltas = dy.concatenate(deltas) risk = dy.sum_elems(dy.cmult(sample_prob, deltas)) ### Debug #print(sample_prob.npvalue().transpose()[0]) #print(deltas.npvalue().transpose()[0]) #print("----------------------") ### End debug return FactoredLossExpr({"risk": risk})
def on_calc_additional_loss(self, trg, generator, generator_loss): if self.policy_learning is None: return None reward = FactoredLossExpr() reward.add_loss("generator", -dy.inputTensor(generator_loss.value(), batched=True)) if self.length_prior is not None: reward.add_loss('length_prior', self.length_prior.log_ll(self.seg_size_unpadded)) reward_value = reward.value() if trg.batch_size() == 1: reward_value = [reward_value] reward_tensor = dy.inputTensor(reward_value, batched=True) ### Calculate losses try: return self.policy_learning.calc_loss(reward_tensor) finally: self.reward = reward if self.train and self.reporter is not None: self.reporter.report_process(self)
def on_calc_additional_loss(self, trg, generator, generator_loss): if self.policy_learning is None: return None trg_counts = dy.inputTensor([t.len_unpadded() for t in trg], batched=True) reward = FactoredLossExpr() # Adding all reward from the translator for loss_key, loss_value in generator_loss.get_nobackprop_loss().items( ): if loss_key == 'mle': reward.add_loss('mle', dy.cdiv(-loss_value, trg_counts)) else: reward.add_loss(loss_key, -loss_value) if self.length_prior is not None: reward.add_loss('seg_lp', self.length_prior.log_ll(self.seg_size_unpadded)) reward = dy.inputTensor(reward.value(), batched=True) ### Calculate losses try: return self.policy_learning.calc_loss(reward) finally: self.reward = reward if self.reporter is not None: self.reporter.report_process(self)
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']): loss = model.calc_nll(src, trg) return FactoredLossExpr({"mle": loss})