def calc_nll(self, src, trg): if not batchers.is_batched(src): src = batchers.ListBatch([src]) src_inputs = batchers.ListBatch( [s[:-1] for s in src], mask=batchers.Mask(src.mask.np_arr[:, :-1]) if src.mask else None) src_targets = batchers.ListBatch( [s[1:] for s in src], mask=batchers.Mask(src.mask.np_arr[:, 1:]) if src.mask else None) event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src_inputs) encodings = self.rnn.transduce(embeddings) encodings_tensor = encodings.as_tensor() ((hidden_dim, seq_len), batch_size) = encodings.dim() encoding_reshaped = dy.reshape(encodings_tensor, (hidden_dim, ), batch_size=batch_size * seq_len) outputs = self.transform.transform(encoding_reshaped) ref_action = np.asarray([sent.words for sent in src_targets]).reshape( (seq_len * batch_size, )) loss_expr_perstep = self.scorer.calc_loss( outputs, batchers.mark_as_batch(ref_action)) loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len, ), batch_size=batch_size) if src_targets.mask: loss_expr_perstep = dy.cmult( loss_expr_perstep, dy.inputTensor(1.0 - src_targets.mask.np_arr.T, batched=True)) loss = dy.sum_elems(loss_expr_perstep) return loss
def test_py_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) for sent_i in range(10): dy.renew_cg() src = self.src_data[sent_i].create_padded_sent( 4 - (self.src_data[sent_i].sent_len() % 4)) event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) self.assertEqual(int(math.ceil(len(embeddings) / float(4))), len(encodings))
def generate(self, src, forced_trg_ids=None, search_strategy=None): event_trigger.start_sent(src) if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) outputs = [] trg = sent.SimpleSentence([0]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) output_actions = [] score = 0. # TODO Fix this with generate_one_step and use the appropriate search_strategy self.max_len = 100 # This is a temporary hack for _ in range(self.max_len): dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY) log_prob_tail = self.calc_loss(src, trg, loss_cal=None, infer_prediction=True) ys = np.argmax(log_prob_tail.npvalue(), axis=0).astype('i') if ys == Vocab.ES: output_actions.append(ys) break output_actions.append(ys) trg = sent.SimpleSentence(words=output_actions + [0]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) # Append output to the outputs if hasattr(self, "trg_vocab") and self.trg_vocab is not None: outputs.append(sent.SimpleSentence(words=output_actions, vocab=self.trg_vocab)) else: outputs.append((output_actions, score)) return outputs
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \ -> tt.Tensor: if not batchers.is_batched(src): src = batchers.ListBatch([src]) src_inputs = batchers.ListBatch( [s[:-1] for s in src], mask=batchers.Mask(src.mask.np_arr[:, :-1]) if src.mask else None) src_targets = batchers.ListBatch( [s[1:] for s in src], mask=batchers.Mask(src.mask.np_arr[:, 1:]) if src.mask else None) event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src_inputs) encodings = self.rnn.transduce(embeddings) encodings_tensor = encodings.as_tensor() encoding_reshaped = tt.merge_time_batch_dims(encodings_tensor) seq_len = tt.sent_len(encodings_tensor) batch_size = tt.batch_size(encodings_tensor) outputs = self.transform.transform(encoding_reshaped) ref_action = np.asarray([sent.words for sent in src_targets]).reshape( (seq_len * batch_size, )) loss_expr_perstep = self.scorer.calc_loss( outputs, batchers.mark_as_batch(ref_action)) loss_expr_perstep = tt.unmerge_time_batch_dims(loss_expr_perstep, batch_size) loss = tt.aggregate_masked_loss(loss_expr_perstep, src_targets.mask) return loss
def generate_search_output(self, src: batchers.Batch, search_strategy: search_strategies.SearchStrategy, forced_trg_ids: batchers.Batch=None) -> List[search_strategies.SearchOutput]: """ Takes in a batch of source sentences and outputs a list of search outputs. Args: src: The source sentences search_strategy: The strategy with which to perform the search forced_trg_ids: The target IDs to generate if performing forced decoding Returns: A list of search outputs including scores, etc. """ if src.batch_size()!=1: raise NotImplementedError("batched decoding not implemented for DefaultTranslator. " "Specify inference batcher with batch size 1.") event_trigger.start_sent(src) all_src = src if isinstance(src, batchers.CompoundBatch): src = src.batches[0] # Generating outputs cur_forced_trg = None src_sent = src[0]#checkme sent_mask = None if src.mask: sent_mask = batchers.Mask(np_arr=src.mask.np_arr[0:1]) sent_batch = batchers.mark_as_batch([sent], mask=sent_mask) # Encode the sentence initial_state = self._encode_src(all_src) if forced_trg_ids is not None: cur_forced_trg = forced_trg_ids[0] search_outputs = search_strategy.generate_output(self, initial_state, src_length=[src_sent.sent_len()], forced_trg_ids=cur_forced_trg) return search_outputs
def generate(self, src: batchers.Batch, normalize_scores: bool = False, *args, **kwargs) -> Sequence[sent.ReadableSentence]: assert src.batch_size() == 1, "batch size > 1 not properly tested" event_trigger.start_sent(src) batch_size, encodings, outputs, seq_len = self._encode_src(src) best_words, best_scores = self.scorer.best_k( outputs, k=1, normalize_scores=normalize_scores) best_words = best_words[0, :] score = np.sum(best_scores, axis=1) outputs = [ sent.SimpleSentence( words=best_words, idx=src[0].idx, vocab=self.trg_vocab if hasattr(self, "trg_vocab") else None, output_procs=self.trg_reader.output_procs, score=score) ] return outputs
def assert_in_out_len_equal(self, model): dy.renew_cg() event_trigger.set_train(True) src = self.src_data[0] event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) self.assertEqual(len(embeddings), len(encodings))
def test_transducer_composer(self): composer = SeqTransducerComposer(seq_transducer=BiLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim)) embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=composer, char_vocab=self.src_char_vocab) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1])
def _encode_src(self, src): event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) encodings_tensor = encodings.as_tensor() ((hidden_dim, seq_len), batch_size) = encodings.dim() encoding_reshaped = dy.reshape(encodings_tensor, (hidden_dim,), batch_size=batch_size * seq_len) outputs = self.transform.transform(encoding_reshaped) return batch_size, encodings, outputs, seq_len
def generate(self, src, forced_trg_ids): assert not forced_trg_ids assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented" src = src[0] # Generating outputs outputs = [] event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if self.mode in ["avg_mlp", "final_mlp"]: if self.generate_per_step: assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True" scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings] else: if self.mode == "avg_mlp": encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1]) elif self.mode == "final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode == "lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True)) enc_lin.append(step_linear) if self.generate_per_step: scores = [dy.logistic(enc_i) for enc_i in enc_lin] else: if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") if self.generate_per_step: output_actions = [np.argmax(score_i.npvalue()) for score_i in scores] score = np.sum([np.max(score_i.npvalue()) for score_i in scores]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) else: scores_arr = scores.npvalue() output_actions = list(np.nonzero(scores_arr > 0.5)[0]) score = np.sum(scores_arr[scores_arr > 0.5]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) return outputs
def _encode_src(self, src: Union[sent.Sentence, batchers.Batch]) -> tuple: event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) encodings_tensor = encodings.as_tensor() encoding_reshaped = tt.merge_time_batch_dims(encodings_tensor) outputs = self.transform.transform(encoding_reshaped) return tt.batch_size( encodings_tensor), encodings, outputs, tt.sent_len( encodings_tensor)
def calc_loss(self, src, trg, infer_prediction=False): event_trigger.start_sent(src) if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) src_words = np.array([[vocabs.Vocab.SS] + x.words for x in src]) batch_size, src_len = src_words.shape if isinstance(src.mask, type(None)): src_mask = np.zeros((batch_size, src_len), dtype=np.int) else: src_mask = np.concatenate([ np.zeros((batch_size, 1), dtype=np.int), src.mask.np_arr.astype(np.int) ], axis=1) src_embeddings = self.sentence_block_embed( self.src_embedder.embeddings, src_words, src_mask) src_embeddings = self.make_input_embedding(src_embeddings, src_len) trg_words = np.array( list(map(lambda x: [vocabs.Vocab.SS] + x.words[:-1], trg))) batch_size, trg_len = trg_words.shape if isinstance(trg.mask, type(None)): trg_mask = np.zeros((batch_size, trg_len), dtype=np.int) else: trg_mask = trg.mask.np_arr.astype(np.int) trg_embeddings = self.sentence_block_embed( self.trg_embedder.embeddings, trg_words, trg_mask) trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len) xx_mask = self.make_attention_mask(src_mask, src_mask) xy_mask = self.make_attention_mask(trg_mask, src_mask) yy_mask = self.make_attention_mask(trg_mask, trg_mask) yy_mask *= self.make_history_mask(trg_mask) z_blocks = self.encoder.transduce(src_embeddings, xx_mask) h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask) if infer_prediction: y_len = h_block.dim()[0][1] last_col = dy.pick(h_block, dim=1, index=y_len - 1) logits = self.decoder.output(last_col) return logits ref_list = list( itertools.chain.from_iterable(map(lambda x: x.words, trg))) concat_t_block = (1 - trg_mask.ravel()).reshape(-1) * np.array(ref_list) loss = self.decoder.output_and_loss(h_block, concat_t_block) return losses.FactoredLossExpr({"mle": loss})
def test_dyer_composer(self): composer = DyerHeadComposer(fwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), bwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), transform=AuxNonLinear(input_dim=self.layer_dim, output_dim=self.layer_dim, aux_input_dim=self.layer_dim)) embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=composer, char_vocab=self.src_char_vocab) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1])
def calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) event_trigger.start_sent(src) return self._perform_calc_loss(model, src, trg)
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression: event_trigger.start_sent(src) if isinstance(src, batchers.CompoundBatch): src = src.batches[0] # Encode the sentence initial_state = self._encode_src(src) dec_state = initial_state trg_mask = trg.mask if batchers.is_batched(trg) else None cur_losses = [] seq_len = trg.sent_len() if settings.CHECK_VALIDITY and batchers.is_batched(src): for j, single_trg in enumerate(trg): assert single_trg.sent_len( ) == seq_len # assert consistent length assert 1 == len([ i for i in range(seq_len) if (trg_mask is None or trg_mask.np_arr[j, i] == 0) and single_trg[i] == vocabs.Vocab.ES ]) # assert exactly one unmasked ES token input_word = None for i in range(seq_len): ref_word = DefaultTranslator._select_ref_words( trg, i, truncate_masked=self.truncate_dec_batches) if self.truncate_dec_batches and batchers.is_batched(ref_word): dec_state.rnn_state, ref_word = batchers.truncate_batches( dec_state.rnn_state, ref_word) if input_word is not None: dec_state = self.decoder.add_input( dec_state, self.trg_embedder.embed(input_word)) rnn_output = dec_state.rnn_state.output() dec_state.context = self.attender.calc_context(rnn_output) word_loss = self.decoder.calc_loss(dec_state, ref_word) if not self.truncate_dec_batches and batchers.is_batched( src) and trg_mask is not None: word_loss = trg_mask.cmult_by_timestep_expr(word_loss, i, inverse=True) cur_losses.append(word_loss) input_word = ref_word if self.truncate_dec_batches: loss_expr = dy.esum([dy.sum_batches(wl) for wl in cur_losses]) else: loss_expr = dy.esum(cur_losses) return loss_expr
def calc_nll(self, src_batch, trg_batch) -> dy.Expression: self.actions.clear() self.outputs.clear() event_trigger.start_sent(src_batch) batch_loss = [] # For every item in the batch for src, trg in zip(src_batch, trg_batch): # Initial state with no read/write actions being taken current_state = self._initial_state(src) src_len = src.sent_len() # Reading + Writing src_encoding = [] loss_exprs = [] now_action = [] outputs = [] # Simultaneous greedy search while not self._stoping_criterions_met(current_state, trg): # Define action based on state action = self.next_action(current_state, src_len, len(src_encoding)) if action == self.Action.READ: # Reading + Encoding current_state = current_state.read(src) src_encoding.append(current_state.encoder_state.output()) else: # Predicting next word current_state = current_state.calc_context(src_encoding) current_output = self.add_input( current_state.prev_written_word, current_state) # Calculating losses ground_truth = self._select_ground_truth( current_state, trg) loss_exprs.append( self.decoder.calc_loss(current_output.state, ground_truth)) # Use word from ref/model depeding on settings next_word = self._select_next_word(ground_truth, current_output.state) # The produced words outputs.append(next_word) current_state = current_state.write(next_word) now_action.append(action.value) self.actions.append(now_action) self.outputs.append(outputs) # Accumulate loss batch_loss.append(dy.esum(loss_exprs)) dy.forward(batch_loss) loss = dy.esum(batch_loss) return loss if not self.freeze_decoder_param else dy.nobackprop(loss)
def test_composite_composer(self): composer = DyerHeadComposer(fwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), bwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), transform=AuxNonLinear(input_dim=self.layer_dim, output_dim=self.layer_dim, aux_input_dim=self.layer_dim)) embedder_1 = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=composer, char_vocab=self.src_char_vocab) embedder_2 = LookupEmbedder(emb_dim=self.layer_dim, vocab_size=100) embedder = CompositeEmbedder(embedders=[embedder_1, embedder_2]) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1]) embedder.embed(self.src[1][0].words[0])
def calc_loss(self, src, trg, loss_calculator): event_trigger.start_sent(src) src_embeddings = self.src_embedder.embed_sent(src) src_encodings = self.src_encoder(src_embeddings) trg_embeddings = self.trg_embedder.embed_sent(trg) trg_encodings = self.trg_encoder(trg_embeddings) model_loss = losses.FactoredLossExpr() model_loss.add_loss("dist", loss_calculator(src_encodings, trg_encodings)) return model_loss
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> tt.Tensor: event_trigger.start_sent(src) if isinstance(src, batchers.CompoundBatch): src = src.batches[0] # Encode the sentence initial_state = self._initial_state(src) dec_state = initial_state trg_mask = trg.mask if batchers.is_batched(trg) else None cur_losses = [] seq_len = trg.sent_len() # Sanity check if requested if settings.CHECK_VALIDITY and batchers.is_batched(src): for j, single_trg in enumerate(trg): # assert consistent length assert single_trg.sent_len() == seq_len # assert exactly one unmasked ES token assert 1 == len([ i for i in range(seq_len) if (trg_mask is None or trg_mask.np_arr[j, i] == 0) and single_trg[i] == vocabs.Vocab.ES ]) input_word = None for i in range(seq_len): ref_word = DefaultTranslator._select_ref_words(trg, i) if input_word is not None: dec_state = self.decoder.add_input(dec_state, input_word) rnn_output = dec_state.as_vector() dec_state.context = self.attender.calc_context(rnn_output) word_loss = self.decoder.calc_loss(dec_state, ref_word) if batchers.is_batched(src) and trg_mask is not None: word_loss = trg_mask.cmult_by_timestep_expr(word_loss, i, inverse=True) cur_losses.append(word_loss) input_word = ref_word loss_expr = tt.esum(cur_losses) return loss_expr
def generate( self, src: batchers.Batch, search_strategy: search_strategies.SearchStrategy ) -> Sequence[sent.Sentence]: """ Takes in a batch of source sentences and outputs a list of search outputs. Args: src: The source sentences search_strategy: The strategy with which to perform the search Returns: A list of search outputs including scores, etc. """ assert src.batch_size() == 1 event_trigger.start_sent(src) search_outputs = self.generate_search_output(src, search_strategy) if isinstance(src, batchers.CompoundBatch): src = src.batches[0] sorted_outputs = sorted(search_outputs, key=lambda x: x.score[0], reverse=True) assert len(sorted_outputs) >= 1 outputs = [] for curr_output in sorted_outputs: output_actions = [x for x in curr_output.word_ids[0]] attentions = [x for x in curr_output.attentions[0]] score = curr_output.score[0] out_sent = self._emit_translation(src, output_actions, score) if len(sorted_outputs) == 1: outputs.append(out_sent) else: outputs.append( sent.NbestSentence(base_sent=out_sent, nbest_id=src[0].idx)) if self.is_reporting(): attentions = np.concatenate([x.npvalue() for x in attentions], axis=1) self.report_sent_info({ "attentions": attentions, "src": src[0], "output": outputs[0] }) return outputs
def test_py_lstm_mask(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) batcher = batchers.TrgBatcher(batch_size=3) train_src, _ = \ batcher.pack(self.src_data, self.trg_data) event_trigger.set_train(True) for sent_i in range(3): dy.renew_cg() src = train_src[sent_i] event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) if train_src[sent_i].mask is None: assert encodings.mask is None else: np.testing.assert_array_almost_equal( train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
def calc_nll(self, src, trg): event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) if self.mode in ["avg_mlp", "final_mlp"]: if self.mode=="avg_mlp": if encodings.mask: encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1] elif self.mode=="final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode=="lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True)) enc_lin.append(step_linear) if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") idxs = ([], []) for batch_i in range(trg.batch_size()): for word in set(trg[batch_i]): if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}: idxs[0].append(word) idxs[1].append(batch_i) trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, ) loss_expr = dy.binary_log_loss(scores, trg_scores) return loss_expr
def calc_nll(self, src_batch, trg_batch) -> losses.LossExpr: event_trigger.start_sent(src_batch) self.create_trajectories(src_batch, trg_batch, force_oracle=not self._is_action_forced()) batch_loss = [] for src, trg, decoder_state in zip(src_batch, trg_batch, self.decoder_states): seq_loss = [ self.decoder.calc_loss(decoder_state[i], trg[i]) for i in range(len(decoder_state)) ] batch_loss.append(dy.esum(seq_loss)) dy.forward(batch_loss) total_loss = dy.concatenate_to_batch(batch_loss) total_units = [ trg_batch[i].len_unpadded() for i in range(trg_batch.batch_size()) ] return losses.LossExpr(total_loss, total_units)
def calc_nll(self, src, trg): event_trigger.start_sent(src) if isinstance(src, batchers.CompoundBatch): src, _ = src.batches initial_state = self._encode_src(src) dec_state = initial_state trg_mask = trg.mask if batchers.is_batched(trg) else None losses = [] seq_len = trg.sent_len() if batchers.is_batched(src): for j, single_trg in enumerate(trg): assert single_trg.sent_len( ) == seq_len # assert consistent length assert 1 == len([ i for i in range(seq_len) if (trg_mask is None or trg_mask.np_arr[j, i] == 0) and single_trg[i] == vocabs.Vocab.ES ]) # assert exactly one unmasked ES token prev_ref_word = None for i in range(seq_len): if not batchers.is_batched(trg): ref_word = trg[i] else: ref_word = batchers.mark_as_batch( [single_trg[i] for single_trg in trg]) word_loss = self.calc_loss_one_step( dec_state=dec_state, batch_size=ref_word.batch_size(), ref_action=ref_word, prev_ref_action=prev_ref_word, mode=self.mode_translate) if batchers.is_batched(src) and trg_mask is not None: word_loss = trg_mask.cmult_by_timestep_expr(word_loss, i, inverse=True) losses.append(word_loss) prev_ref_word = ref_word return dy.esum(losses)
def calc_policy_nll(self, src_batch, trg_batch) -> losses.LossExpr: assert self.policy_network is not None event_trigger.start_sent(src_batch) self.create_trajectories(src_batch, trg_batch, force_oracle=not self._is_action_forced()) batch_loss = [] for src, action, model_states in zip(src_batch, self.actions, self.model_states): policy_actions = model_states[-1].find_backward("policy_action") seq_ll = [ dy.pick(act.log_likelihood, act.content) for act in policy_actions ] batch_loss.append(-dy.esum(seq_ll)) dy.forward(batch_loss) total_loss = dy.concatenate_to_batch(batch_loss) total_units = [len(x) for x in self.actions] return losses.LossExpr(total_loss, total_units)
def generate_search_output( self, src: batchers.Batch, search_strategy: search_strategies.SearchStrategy ) -> List[search_strategies.SearchOutput]: """ Takes in a batch of source sentences and outputs a list of search outputs. Args: src: The source sentences search_strategy: The strategy with which to perform the search Returns: A list of search outputs including scores, etc. """ if src.batch_size() != 1: raise NotImplementedError( "batched decoding not implemented for DefaultTranslator. " "Specify inference batcher with batch size 1.") event_trigger.start_sent(src) if isinstance(src, batchers.CompoundBatch): src = src.batches[0] search_outputs = search_strategy.generate_output( self, self._initial_state(src), src_length=src.sent_len()) return search_outputs
def generate(self, src: Union[batchers.Batch, sent.Sentence], normalize_scores: bool = False, *args, **kwargs): if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) event_trigger.start_sent(src) h = self._encode_src(src) best_words, best_scores = self.scorer.best_k( h, k=1, normalize_scores=normalize_scores) assert best_words.shape == (1, src.batch_size()) assert best_scores.shape == (1, src.batch_size()) outputs = [] for batch_i in range(src.batch_size()): if src.batch_size() > 1: word = best_words[0, batch_i] score = best_scores[0, batch_i] else: word = best_words[0] score = best_scores[0] outputs.append(sent.ScalarSentence(value=word, score=score)) return outputs
def inp_emb(self, idx=0): event_trigger.start_sent(self.src[idx]) embed = self.model.src_embedder.embed_sent(self.src[idx]) return embed
def test_bagofwords_embedder_with_word_vocab(self): embedder = BagOfWordsEmbedder(self.layer_dim, word_vocab=self.src_vocab, ngram_vocab= self.ngram_vocab, ngram_size=3) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1])
def _encode_src(self, src): event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) self.encoder.transduce(embeddings) h = self.encoder.get_final_states()[-1].main_expr() return self.transform.transform(h)