예제 #1
0
    def calc_nll(self, src, trg):
        if not batchers.is_batched(src):
            src = batchers.ListBatch([src])

        src_inputs = batchers.ListBatch(
            [s[:-1] for s in src],
            mask=batchers.Mask(src.mask.np_arr[:, :-1]) if src.mask else None)
        src_targets = batchers.ListBatch(
            [s[1:] for s in src],
            mask=batchers.Mask(src.mask.np_arr[:, 1:]) if src.mask else None)

        event_trigger.start_sent(src)
        embeddings = self.src_embedder.embed_sent(src_inputs)
        encodings = self.rnn.transduce(embeddings)
        encodings_tensor = encodings.as_tensor()
        ((hidden_dim, seq_len), batch_size) = encodings.dim()
        encoding_reshaped = dy.reshape(encodings_tensor, (hidden_dim, ),
                                       batch_size=batch_size * seq_len)
        outputs = self.transform.transform(encoding_reshaped)

        ref_action = np.asarray([sent.words for sent in src_targets]).reshape(
            (seq_len * batch_size, ))
        loss_expr_perstep = self.scorer.calc_loss(
            outputs, batchers.mark_as_batch(ref_action))
        loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len, ),
                                       batch_size=batch_size)
        if src_targets.mask:
            loss_expr_perstep = dy.cmult(
                loss_expr_perstep,
                dy.inputTensor(1.0 - src_targets.mask.np_arr.T, batched=True))
        loss = dy.sum_elems(loss_expr_perstep)

        return loss
예제 #2
0
 def test_py_lstm_encoder_len(self):
     layer_dim = 512
     model = DefaultTranslator(
         src_reader=self.src_reader,
         trg_reader=self.trg_reader,
         src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                            hidden_dim=layer_dim,
                                            layers=3),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         decoder=AutoRegressiveDecoder(
             input_dim=layer_dim,
             embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
             rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                      hidden_dim=layer_dim,
                                      decoder_input_dim=layer_dim,
                                      yaml_path="model.decoder.rnn"),
             transform=NonLinear(input_dim=layer_dim * 2,
                                 output_dim=layer_dim),
             scorer=Softmax(input_dim=layer_dim, vocab_size=100),
             bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
     )
     event_trigger.set_train(True)
     for sent_i in range(10):
         dy.renew_cg()
         src = self.src_data[sent_i].create_padded_sent(
             4 - (self.src_data[sent_i].sent_len() % 4))
         event_trigger.start_sent(src)
         embeddings = model.src_embedder.embed_sent(src)
         encodings = model.encoder.transduce(embeddings)
         self.assertEqual(int(math.ceil(len(embeddings) / float(4))),
                          len(encodings))
예제 #3
0
  def generate(self, src, forced_trg_ids=None, search_strategy=None):
    event_trigger.start_sent(src)
    if not batchers.is_batched(src):
      src = batchers.mark_as_batch([src])
    outputs = []

    trg = sent.SimpleSentence([0])

    if not batchers.is_batched(trg):
      trg = batchers.mark_as_batch([trg])

    output_actions = []
    score = 0.

    # TODO Fix this with generate_one_step and use the appropriate search_strategy
    self.max_len = 100 # This is a temporary hack
    for _ in range(self.max_len):
      dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY)
      log_prob_tail = self.calc_loss(src, trg, loss_cal=None, infer_prediction=True)
      ys = np.argmax(log_prob_tail.npvalue(), axis=0).astype('i')
      if ys == Vocab.ES:
        output_actions.append(ys)
        break
      output_actions.append(ys)
      trg = sent.SimpleSentence(words=output_actions + [0])
      if not batchers.is_batched(trg):
        trg = batchers.mark_as_batch([trg])

    # Append output to the outputs
    if hasattr(self, "trg_vocab") and self.trg_vocab is not None:
      outputs.append(sent.SimpleSentence(words=output_actions, vocab=self.trg_vocab))
    else:
      outputs.append((output_actions, score))

    return outputs
예제 #4
0
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \
            -> tt.Tensor:
        if not batchers.is_batched(src):
            src = batchers.ListBatch([src])

        src_inputs = batchers.ListBatch(
            [s[:-1] for s in src],
            mask=batchers.Mask(src.mask.np_arr[:, :-1]) if src.mask else None)
        src_targets = batchers.ListBatch(
            [s[1:] for s in src],
            mask=batchers.Mask(src.mask.np_arr[:, 1:]) if src.mask else None)

        event_trigger.start_sent(src)
        embeddings = self.src_embedder.embed_sent(src_inputs)
        encodings = self.rnn.transduce(embeddings)
        encodings_tensor = encodings.as_tensor()

        encoding_reshaped = tt.merge_time_batch_dims(encodings_tensor)
        seq_len = tt.sent_len(encodings_tensor)
        batch_size = tt.batch_size(encodings_tensor)

        outputs = self.transform.transform(encoding_reshaped)

        ref_action = np.asarray([sent.words for sent in src_targets]).reshape(
            (seq_len * batch_size, ))
        loss_expr_perstep = self.scorer.calc_loss(
            outputs, batchers.mark_as_batch(ref_action))

        loss_expr_perstep = tt.unmerge_time_batch_dims(loss_expr_perstep,
                                                       batch_size)

        loss = tt.aggregate_masked_loss(loss_expr_perstep, src_targets.mask)

        return loss
예제 #5
0
  def generate_search_output(self,
                             src: batchers.Batch,
                             search_strategy: search_strategies.SearchStrategy,
                             forced_trg_ids: batchers.Batch=None) -> List[search_strategies.SearchOutput]:
    """
    Takes in a batch of source sentences and outputs a list of search outputs.
    Args:
      src: The source sentences
      search_strategy: The strategy with which to perform the search
      forced_trg_ids: The target IDs to generate if performing forced decoding
    Returns:
      A list of search outputs including scores, etc.
    """
    if src.batch_size()!=1:
      raise NotImplementedError("batched decoding not implemented for DefaultTranslator. "
                                "Specify inference batcher with batch size 1.")
    event_trigger.start_sent(src)
    all_src = src
    if isinstance(src, batchers.CompoundBatch): src = src.batches[0]
    # Generating outputs
    cur_forced_trg = None
    src_sent = src[0]#checkme
    sent_mask = None
    if src.mask: sent_mask = batchers.Mask(np_arr=src.mask.np_arr[0:1])
    sent_batch = batchers.mark_as_batch([sent], mask=sent_mask)

    # Encode the sentence
    initial_state = self._encode_src(all_src)

    if forced_trg_ids is  not None: cur_forced_trg = forced_trg_ids[0]
    search_outputs = search_strategy.generate_output(self, initial_state,
                                                     src_length=[src_sent.sent_len()],
                                                     forced_trg_ids=cur_forced_trg)
    return search_outputs
예제 #6
0
    def generate(self,
                 src: batchers.Batch,
                 normalize_scores: bool = False,
                 *args,
                 **kwargs) -> Sequence[sent.ReadableSentence]:
        assert src.batch_size() == 1, "batch size > 1 not properly tested"
        event_trigger.start_sent(src)

        batch_size, encodings, outputs, seq_len = self._encode_src(src)

        best_words, best_scores = self.scorer.best_k(
            outputs, k=1, normalize_scores=normalize_scores)
        best_words = best_words[0, :]
        score = np.sum(best_scores, axis=1)

        outputs = [
            sent.SimpleSentence(
                words=best_words,
                idx=src[0].idx,
                vocab=self.trg_vocab if hasattr(self, "trg_vocab") else None,
                output_procs=self.trg_reader.output_procs,
                score=score)
        ]

        return outputs
예제 #7
0
 def assert_in_out_len_equal(self, model):
     dy.renew_cg()
     event_trigger.set_train(True)
     src = self.src_data[0]
     event_trigger.start_sent(src)
     embeddings = model.src_embedder.embed_sent(src)
     encodings = model.encoder.transduce(embeddings)
     self.assertEqual(len(embeddings), len(encodings))
예제 #8
0
 def test_transducer_composer(self):
   composer = SeqTransducerComposer(seq_transducer=BiLSTMSeqTransducer(input_dim=self.layer_dim,
                                                                       hidden_dim=self.layer_dim))
   embedder = CharCompositionEmbedder(emb_dim=self.layer_dim,
                                      composer=composer,
                                      char_vocab=self.src_char_vocab)
   event_trigger.set_train(True)
   event_trigger.start_sent(self.src[1])
   embedder.embed_sent(self.src[1])
예제 #9
0
 def _encode_src(self, src):
   event_trigger.start_sent(src)
   embeddings = self.src_embedder.embed_sent(src)
   encodings = self.encoder.transduce(embeddings)
   encodings_tensor = encodings.as_tensor()
   ((hidden_dim, seq_len), batch_size) = encodings.dim()
   encoding_reshaped = dy.reshape(encodings_tensor, (hidden_dim,), batch_size=batch_size * seq_len)
   outputs = self.transform.transform(encoding_reshaped)
   return batch_size, encodings, outputs, seq_len
예제 #10
0
파일: bow.py 프로젝트: seeledu/xnmt-devel
  def generate(self, src, forced_trg_ids):
    assert not forced_trg_ids
    assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented"
    src = src[0]
    # Generating outputs
    outputs = []
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.generate_per_step:
        assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True"
        scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings]
      else:
        if self.mode == "avg_mlp":
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1])
        elif self.mode == "final_mlp":
          encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
        scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode == "lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True))
        enc_lin.append(step_linear)
      if self.generate_per_step:
        scores = [dy.logistic(enc_i) for enc_i in enc_lin]
      else:
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                        dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
        scores = dy.logistic(encoding_fixed_size)
    else:
      raise ValueError(f"unknown mode '{self.mode}'")

    if self.generate_per_step:
      output_actions = [np.argmax(score_i.npvalue()) for score_i in scores]
      score = np.sum([np.max(score_i.npvalue()) for score_i in scores])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    else:
      scores_arr = scores.npvalue()
      output_actions = list(np.nonzero(scores_arr > 0.5)[0])
      score = np.sum(scores_arr[scores_arr > 0.5])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    return outputs
예제 #11
0
 def _encode_src(self, src: Union[sent.Sentence, batchers.Batch]) -> tuple:
     event_trigger.start_sent(src)
     embeddings = self.src_embedder.embed_sent(src)
     encodings = self.encoder.transduce(embeddings)
     encodings_tensor = encodings.as_tensor()
     encoding_reshaped = tt.merge_time_batch_dims(encodings_tensor)
     outputs = self.transform.transform(encoding_reshaped)
     return tt.batch_size(
         encodings_tensor), encodings, outputs, tt.sent_len(
             encodings_tensor)
예제 #12
0
    def calc_loss(self, src, trg, infer_prediction=False):
        event_trigger.start_sent(src)
        if not batchers.is_batched(src):
            src = batchers.mark_as_batch([src])
        if not batchers.is_batched(trg):
            trg = batchers.mark_as_batch([trg])
        src_words = np.array([[vocabs.Vocab.SS] + x.words for x in src])
        batch_size, src_len = src_words.shape

        if isinstance(src.mask, type(None)):
            src_mask = np.zeros((batch_size, src_len), dtype=np.int)
        else:
            src_mask = np.concatenate([
                np.zeros((batch_size, 1), dtype=np.int),
                src.mask.np_arr.astype(np.int)
            ],
                                      axis=1)

        src_embeddings = self.sentence_block_embed(
            self.src_embedder.embeddings, src_words, src_mask)
        src_embeddings = self.make_input_embedding(src_embeddings, src_len)

        trg_words = np.array(
            list(map(lambda x: [vocabs.Vocab.SS] + x.words[:-1], trg)))
        batch_size, trg_len = trg_words.shape

        if isinstance(trg.mask, type(None)):
            trg_mask = np.zeros((batch_size, trg_len), dtype=np.int)
        else:
            trg_mask = trg.mask.np_arr.astype(np.int)

        trg_embeddings = self.sentence_block_embed(
            self.trg_embedder.embeddings, trg_words, trg_mask)
        trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len)

        xx_mask = self.make_attention_mask(src_mask, src_mask)
        xy_mask = self.make_attention_mask(trg_mask, src_mask)
        yy_mask = self.make_attention_mask(trg_mask, trg_mask)
        yy_mask *= self.make_history_mask(trg_mask)

        z_blocks = self.encoder.transduce(src_embeddings, xx_mask)
        h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask)

        if infer_prediction:
            y_len = h_block.dim()[0][1]
            last_col = dy.pick(h_block, dim=1, index=y_len - 1)
            logits = self.decoder.output(last_col)
            return logits

        ref_list = list(
            itertools.chain.from_iterable(map(lambda x: x.words, trg)))
        concat_t_block = (1 -
                          trg_mask.ravel()).reshape(-1) * np.array(ref_list)
        loss = self.decoder.output_and_loss(h_block, concat_t_block)
        return losses.FactoredLossExpr({"mle": loss})
예제 #13
0
 def test_dyer_composer(self):
   composer = DyerHeadComposer(fwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim),
                               bwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim),
                               transform=AuxNonLinear(input_dim=self.layer_dim,
                                                      output_dim=self.layer_dim,
                                                      aux_input_dim=self.layer_dim))
   embedder = CharCompositionEmbedder(emb_dim=self.layer_dim,
                                      composer=composer,
                                      char_vocab=self.src_char_vocab)
   event_trigger.set_train(True)
   event_trigger.start_sent(self.src[1])
   embedder.embed_sent(self.src[1])
예제 #14
0
    def calc_loss(
        self, model: 'model_base.ConditionedModel',
        src: Union[sent.Sentence, 'batchers.Batch'],
        trg: Union[sent.Sentence,
                   'batchers.Batch']) -> losses.FactoredLossExpr:
        if not batchers.is_batched(src):
            src = batchers.mark_as_batch([src])
        if not batchers.is_batched(trg):
            trg = batchers.mark_as_batch([trg])

        event_trigger.start_sent(src)
        return self._perform_calc_loss(model, src, trg)
예제 #15
0
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence],
                 trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression:
        event_trigger.start_sent(src)
        if isinstance(src, batchers.CompoundBatch): src = src.batches[0]
        # Encode the sentence
        initial_state = self._encode_src(src)

        dec_state = initial_state
        trg_mask = trg.mask if batchers.is_batched(trg) else None
        cur_losses = []
        seq_len = trg.sent_len()

        if settings.CHECK_VALIDITY and batchers.is_batched(src):
            for j, single_trg in enumerate(trg):
                assert single_trg.sent_len(
                ) == seq_len  # assert consistent length
                assert 1 == len([
                    i for i in range(seq_len)
                    if (trg_mask is None or trg_mask.np_arr[j, i] == 0)
                    and single_trg[i] == vocabs.Vocab.ES
                ])  # assert exactly one unmasked ES token

        input_word = None
        for i in range(seq_len):
            ref_word = DefaultTranslator._select_ref_words(
                trg, i, truncate_masked=self.truncate_dec_batches)
            if self.truncate_dec_batches and batchers.is_batched(ref_word):
                dec_state.rnn_state, ref_word = batchers.truncate_batches(
                    dec_state.rnn_state, ref_word)

            if input_word is not None:
                dec_state = self.decoder.add_input(
                    dec_state, self.trg_embedder.embed(input_word))
            rnn_output = dec_state.rnn_state.output()
            dec_state.context = self.attender.calc_context(rnn_output)
            word_loss = self.decoder.calc_loss(dec_state, ref_word)

            if not self.truncate_dec_batches and batchers.is_batched(
                    src) and trg_mask is not None:
                word_loss = trg_mask.cmult_by_timestep_expr(word_loss,
                                                            i,
                                                            inverse=True)
            cur_losses.append(word_loss)
            input_word = ref_word

        if self.truncate_dec_batches:
            loss_expr = dy.esum([dy.sum_batches(wl) for wl in cur_losses])
        else:
            loss_expr = dy.esum(cur_losses)
        return loss_expr
예제 #16
0
 def calc_nll(self, src_batch, trg_batch) -> dy.Expression:
     self.actions.clear()
     self.outputs.clear()
     event_trigger.start_sent(src_batch)
     batch_loss = []
     # For every item in the batch
     for src, trg in zip(src_batch, trg_batch):
         # Initial state with no read/write actions being taken
         current_state = self._initial_state(src)
         src_len = src.sent_len()
         # Reading + Writing
         src_encoding = []
         loss_exprs = []
         now_action = []
         outputs = []
         # Simultaneous greedy search
         while not self._stoping_criterions_met(current_state, trg):
             # Define action based on state
             action = self.next_action(current_state, src_len,
                                       len(src_encoding))
             if action == self.Action.READ:
                 # Reading + Encoding
                 current_state = current_state.read(src)
                 src_encoding.append(current_state.encoder_state.output())
             else:
                 # Predicting next word
                 current_state = current_state.calc_context(src_encoding)
                 current_output = self.add_input(
                     current_state.prev_written_word, current_state)
                 # Calculating losses
                 ground_truth = self._select_ground_truth(
                     current_state, trg)
                 loss_exprs.append(
                     self.decoder.calc_loss(current_output.state,
                                            ground_truth))
                 # Use word from ref/model depeding on settings
                 next_word = self._select_next_word(ground_truth,
                                                    current_output.state)
                 # The produced words
                 outputs.append(next_word)
                 current_state = current_state.write(next_word)
             now_action.append(action.value)
         self.actions.append(now_action)
         self.outputs.append(outputs)
         # Accumulate loss
         batch_loss.append(dy.esum(loss_exprs))
     dy.forward(batch_loss)
     loss = dy.esum(batch_loss)
     return loss if not self.freeze_decoder_param else dy.nobackprop(loss)
예제 #17
0
 def test_composite_composer(self):
   composer = DyerHeadComposer(fwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim),
                               bwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim),
                               transform=AuxNonLinear(input_dim=self.layer_dim,
                                                      output_dim=self.layer_dim,
                                                      aux_input_dim=self.layer_dim))
   embedder_1 = CharCompositionEmbedder(emb_dim=self.layer_dim,
                                      composer=composer,
                                      char_vocab=self.src_char_vocab)
   embedder_2 = LookupEmbedder(emb_dim=self.layer_dim, vocab_size=100)
   embedder = CompositeEmbedder(embedders=[embedder_1, embedder_2])
   event_trigger.set_train(True)
   event_trigger.start_sent(self.src[1])
   embedder.embed_sent(self.src[1])
   embedder.embed(self.src[1][0].words[0])
예제 #18
0
    def calc_loss(self, src, trg, loss_calculator):

        event_trigger.start_sent(src)

        src_embeddings = self.src_embedder.embed_sent(src)
        src_encodings = self.src_encoder(src_embeddings)

        trg_embeddings = self.trg_embedder.embed_sent(trg)
        trg_encodings = self.trg_encoder(trg_embeddings)

        model_loss = losses.FactoredLossExpr()
        model_loss.add_loss("dist",
                            loss_calculator(src_encodings, trg_encodings))

        return model_loss
예제 #19
0
파일: default.py 프로젝트: yzhen-li/xnmt
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence],
                 trg: Union[batchers.Batch, sent.Sentence]) -> tt.Tensor:
        event_trigger.start_sent(src)
        if isinstance(src, batchers.CompoundBatch): src = src.batches[0]
        # Encode the sentence
        initial_state = self._initial_state(src)

        dec_state = initial_state
        trg_mask = trg.mask if batchers.is_batched(trg) else None
        cur_losses = []
        seq_len = trg.sent_len()

        # Sanity check if requested
        if settings.CHECK_VALIDITY and batchers.is_batched(src):
            for j, single_trg in enumerate(trg):
                # assert consistent length
                assert single_trg.sent_len() == seq_len
                # assert exactly one unmasked ES token
                assert 1 == len([
                    i for i in range(seq_len)
                    if (trg_mask is None or trg_mask.np_arr[j, i] == 0)
                    and single_trg[i] == vocabs.Vocab.ES
                ])

        input_word = None
        for i in range(seq_len):
            ref_word = DefaultTranslator._select_ref_words(trg, i)

            if input_word is not None:
                dec_state = self.decoder.add_input(dec_state, input_word)
            rnn_output = dec_state.as_vector()
            dec_state.context = self.attender.calc_context(rnn_output)
            word_loss = self.decoder.calc_loss(dec_state, ref_word)

            if batchers.is_batched(src) and trg_mask is not None:
                word_loss = trg_mask.cmult_by_timestep_expr(word_loss,
                                                            i,
                                                            inverse=True)
            cur_losses.append(word_loss)
            input_word = ref_word

        loss_expr = tt.esum(cur_losses)
        return loss_expr
예제 #20
0
파일: default.py 프로젝트: rezahaffari/xnmt
    def generate(
        self, src: batchers.Batch,
        search_strategy: search_strategies.SearchStrategy
    ) -> Sequence[sent.Sentence]:
        """
    Takes in a batch of source sentences and outputs a list of search outputs.
    Args:
      src: The source sentences
      search_strategy: The strategy with which to perform the search
    Returns:
      A list of search outputs including scores, etc.
    """
        assert src.batch_size() == 1
        event_trigger.start_sent(src)
        search_outputs = self.generate_search_output(src, search_strategy)
        if isinstance(src, batchers.CompoundBatch): src = src.batches[0]
        sorted_outputs = sorted(search_outputs,
                                key=lambda x: x.score[0],
                                reverse=True)
        assert len(sorted_outputs) >= 1
        outputs = []
        for curr_output in sorted_outputs:
            output_actions = [x for x in curr_output.word_ids[0]]
            attentions = [x for x in curr_output.attentions[0]]
            score = curr_output.score[0]
            out_sent = self._emit_translation(src, output_actions, score)
            if len(sorted_outputs) == 1:
                outputs.append(out_sent)
            else:
                outputs.append(
                    sent.NbestSentence(base_sent=out_sent,
                                       nbest_id=src[0].idx))

        if self.is_reporting():
            attentions = np.concatenate([x.npvalue() for x in attentions],
                                        axis=1)
            self.report_sent_info({
                "attentions": attentions,
                "src": src[0],
                "output": outputs[0]
            })

        return outputs
예제 #21
0
    def test_py_lstm_mask(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                               hidden_dim=layer_dim,
                                               layers=1),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                trg_embed_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="model.decoder.rnn"),
                transform=NonLinear(input_dim=layer_dim * 2,
                                    output_dim=layer_dim),
                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )

        batcher = batchers.TrgBatcher(batch_size=3)
        train_src, _ = \
          batcher.pack(self.src_data, self.trg_data)

        event_trigger.set_train(True)
        for sent_i in range(3):
            dy.renew_cg()
            src = train_src[sent_i]
            event_trigger.start_sent(src)
            embeddings = model.src_embedder.embed_sent(src)
            encodings = model.encoder.transduce(embeddings)
            if train_src[sent_i].mask is None:
                assert encodings.mask is None
            else:
                np.testing.assert_array_almost_equal(
                    train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
예제 #22
0
파일: bow.py 프로젝트: seeledu/xnmt-devel
  def calc_nll(self, src, trg):
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg])

    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.mode=="avg_mlp":
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]),
                                 dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1]
      elif self.mode=="final_mlp":
        encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
      scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode=="lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True))
        enc_lin.append(step_linear)
      if encodings.mask:
        encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                      dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
      else:
        encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
      scores = dy.logistic(encoding_fixed_size)

    else: raise ValueError(f"unknown mode '{self.mode}'")

    idxs = ([], [])
    for batch_i in range(trg.batch_size()):
      for word in set(trg[batch_i]):
        if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}:
          idxs[0].append(word)
          idxs[1].append(batch_i)
    trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, )
    loss_expr = dy.binary_log_loss(scores, trg_scores)
    return loss_expr
예제 #23
0
    def calc_nll(self, src_batch, trg_batch) -> losses.LossExpr:
        event_trigger.start_sent(src_batch)
        self.create_trajectories(src_batch,
                                 trg_batch,
                                 force_oracle=not self._is_action_forced())

        batch_loss = []
        for src, trg, decoder_state in zip(src_batch, trg_batch,
                                           self.decoder_states):
            seq_loss = [
                self.decoder.calc_loss(decoder_state[i], trg[i])
                for i in range(len(decoder_state))
            ]
            batch_loss.append(dy.esum(seq_loss))

        dy.forward(batch_loss)
        total_loss = dy.concatenate_to_batch(batch_loss)
        total_units = [
            trg_batch[i].len_unpadded() for i in range(trg_batch.batch_size())
        ]
        return losses.LossExpr(total_loss, total_units)
예제 #24
0
    def calc_nll(self, src, trg):
        event_trigger.start_sent(src)
        if isinstance(src, batchers.CompoundBatch):
            src, _ = src.batches
        initial_state = self._encode_src(src)

        dec_state = initial_state
        trg_mask = trg.mask if batchers.is_batched(trg) else None
        losses = []
        seq_len = trg.sent_len()
        if batchers.is_batched(src):
            for j, single_trg in enumerate(trg):
                assert single_trg.sent_len(
                ) == seq_len  # assert consistent length
                assert 1 == len([
                    i for i in range(seq_len)
                    if (trg_mask is None or trg_mask.np_arr[j, i] == 0)
                    and single_trg[i] == vocabs.Vocab.ES
                ])  # assert exactly one unmasked ES token
        prev_ref_word = None
        for i in range(seq_len):
            if not batchers.is_batched(trg):
                ref_word = trg[i]
            else:
                ref_word = batchers.mark_as_batch(
                    [single_trg[i] for single_trg in trg])
            word_loss = self.calc_loss_one_step(
                dec_state=dec_state,
                batch_size=ref_word.batch_size(),
                ref_action=ref_word,
                prev_ref_action=prev_ref_word,
                mode=self.mode_translate)
            if batchers.is_batched(src) and trg_mask is not None:
                word_loss = trg_mask.cmult_by_timestep_expr(word_loss,
                                                            i,
                                                            inverse=True)
            losses.append(word_loss)
            prev_ref_word = ref_word

        return dy.esum(losses)
예제 #25
0
    def calc_policy_nll(self, src_batch, trg_batch) -> losses.LossExpr:
        assert self.policy_network is not None

        event_trigger.start_sent(src_batch)
        self.create_trajectories(src_batch,
                                 trg_batch,
                                 force_oracle=not self._is_action_forced())

        batch_loss = []
        for src, action, model_states in zip(src_batch, self.actions,
                                             self.model_states):
            policy_actions = model_states[-1].find_backward("policy_action")
            seq_ll = [
                dy.pick(act.log_likelihood, act.content)
                for act in policy_actions
            ]
            batch_loss.append(-dy.esum(seq_ll))

        dy.forward(batch_loss)
        total_loss = dy.concatenate_to_batch(batch_loss)
        total_units = [len(x) for x in self.actions]
        return losses.LossExpr(total_loss, total_units)
예제 #26
0
파일: default.py 프로젝트: yzhen-li/xnmt
 def generate_search_output(
     self, src: batchers.Batch,
     search_strategy: search_strategies.SearchStrategy
 ) -> List[search_strategies.SearchOutput]:
     """
 Takes in a batch of source sentences and outputs a list of search outputs.
 Args:
   src: The source sentences
   search_strategy: The strategy with which to perform the search
 Returns:
   A list of search outputs including scores, etc.
 """
     if src.batch_size() != 1:
         raise NotImplementedError(
             "batched decoding not implemented for DefaultTranslator. "
             "Specify inference batcher with batch size 1.")
     event_trigger.start_sent(src)
     if isinstance(src, batchers.CompoundBatch):
         src = src.batches[0]
     search_outputs = search_strategy.generate_output(
         self, self._initial_state(src), src_length=src.sent_len())
     return search_outputs
예제 #27
0
    def generate(self,
                 src: Union[batchers.Batch, sent.Sentence],
                 normalize_scores: bool = False,
                 *args,
                 **kwargs):
        if not batchers.is_batched(src):
            src = batchers.mark_as_batch([src])
        event_trigger.start_sent(src)
        h = self._encode_src(src)
        best_words, best_scores = self.scorer.best_k(
            h, k=1, normalize_scores=normalize_scores)
        assert best_words.shape == (1, src.batch_size())
        assert best_scores.shape == (1, src.batch_size())

        outputs = []
        for batch_i in range(src.batch_size()):
            if src.batch_size() > 1:
                word = best_words[0, batch_i]
                score = best_scores[0, batch_i]
            else:
                word = best_words[0]
                score = best_scores[0]
            outputs.append(sent.ScalarSentence(value=word, score=score))
        return outputs
예제 #28
0
 def inp_emb(self, idx=0):
   event_trigger.start_sent(self.src[idx])
   embed = self.model.src_embedder.embed_sent(self.src[idx])
   return embed
예제 #29
0
 def test_bagofwords_embedder_with_word_vocab(self):
   embedder = BagOfWordsEmbedder(self.layer_dim, word_vocab=self.src_vocab, ngram_vocab= self.ngram_vocab, ngram_size=3)
   event_trigger.set_train(True)
   event_trigger.start_sent(self.src[1])
   embedder.embed_sent(self.src[1])
예제 #30
0
 def _encode_src(self, src):
     event_trigger.start_sent(src)
     embeddings = self.src_embedder.embed_sent(src)
     self.encoder.transduce(embeddings)
     h = self.encoder.get_final_states()[-1].main_expr()
     return self.transform.transform(h)