Пример #1
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted
        # (in case there are any new ones)
        base_architecture(args)

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        encoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=src_dict,
            embed_dim=args.encoder_embed_dim,
            path=args.encoder_pretrained_embed,
            freeze=args.encoder_freeze_embed,
        )
        decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=tgt_dict,
            embed_dim=args.decoder_embed_dim,
            path=args.decoder_pretrained_embed,
            freeze=args.decoder_freeze_embed,
        )

        encoder = HybridTransformerRNNModel.build_encoder(
            args, src_dict, encoder_embed_tokens, proj_to_decoder=False)
        decoder = HybridTransformerRNNModel.build_decoder(
            args, src_dict, tgt_dict, decoder_embed_tokens)
        return HybridTransformerRNNModel(task, encoder, decoder)
Пример #2
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted
        # (in case there are any new ones)
        cloze_transformer_architecture(args)

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        if args.share_all_embeddings:
            if src_dict != tgt_dict:
                raise RuntimeError(
                    "--share-all-embeddings requires a joined dictionary"
                )
            if args.encoder_embed_dim != args.decoder_embed_dim:
                raise RuntimeError(
                    "--share-all-embeddings requires --encoder-embed-dim "
                    "to match --decoder-embed-dim"
                )
            if args.decoder_pretrained_embed and (
                args.decoder_pretrained_embed != args.encoder_pretrained_embed
            ):
                raise RuntimeError(
                    "--share-all-embeddings not compatible with "
                    "--decoder-pretrained-embed"
                )
            encoder_embed_tokens = build_embedding(
                dictionary=src_dict,
                embed_dim=args.encoder_embed_dim,
                path=args.encoder_pretrained_embed,
                freeze=args.encoder_freeze_embed,
            )
            decoder_embed_tokens = encoder_embed_tokens
            args.share_decoder_input_output_embed = True
        else:
            encoder_embed_tokens = build_embedding(
                dictionary=src_dict,
                embed_dim=args.encoder_embed_dim,
                path=args.encoder_pretrained_embed,
                freeze=args.encoder_freeze_embed,
            )
            decoder_embed_tokens = build_embedding(
                dictionary=tgt_dict,
                embed_dim=args.decoder_embed_dim,
                path=args.decoder_pretrained_embed,
                freeze=args.decoder_freeze_embed,
            )

        encoder = ClozeTransformerModel.build_encoder(
            args, src_dict, embed_tokens=encoder_embed_tokens
        )
        decoder = ClozeTransformerModel.build_decoder(
            args, src_dict, tgt_dict, embed_tokens=decoder_embed_tokens
        )
        return ClozeTransformerModel(task, encoder, decoder)
Пример #3
0
    def build_encoder(cls, args, src_dict):
        # If we embed bytes then the number of indices is fixed and does not
        # depend on the dictionary
        if args.embed_bytes:
            num_chars = vocab_constants.NUM_BYTE_INDICES + TAGS.__len__() + 1
        else:
            num_chars = args.char_source_dict_size

        encoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=src_dict,
            embed_dim=args.encoder_embed_dim,
            path=args.encoder_pretrained_embed,
            freeze=args.encoder_freeze_embed,
        )
        return CharCNNEncoder(
            args,
            src_dict,
            encoder_embed_tokens,
            num_chars=num_chars,
            embed_dim=args.char_embed_dim,
            char_cnn_params=args.char_cnn_params,
            char_cnn_nonlinear_fn=args.char_cnn_nonlinear_fn,
            char_cnn_pool_type=args.char_cnn_pool_type,
            char_cnn_num_highway_layers=args.char_cnn_num_highway_layers,
            char_cnn_output_dim=getattr(args, "char_cnn_output_dim", -1),
            use_pretrained_weights=getattr(args, "use_pretrained_weights",
                                           False),
            finetune_pretrained_weights=getattr(args,
                                                "finetune_pretrained_weights",
                                                False),
            weights_file=getattr(args, "pretrained_weights_file", ""),
        )
    def build_decoder(cls, args, src_dict, dst_dict):
        # If we embed bytes then the number of indices is fixed and does not
        # depend on the dictionary
        if args.embed_bytes:
            num_chars = vocab_constants.NUM_BYTE_INDICES + TAGS.__len__() + 1
        else:
            num_chars = args.char_target_dict_size

        decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=dst_dict,
            embed_dim=args.decoder_embed_dim,
            path=args.decoder_pretrained_embed,
            freeze=args.decoder_freeze_embed,
        )
        return CharAwareHybridRNNDecoder(
            args,
            src_dict=src_dict,
            dst_dict=dst_dict,
            embed_tokens=decoder_embed_tokens,
            num_chars=num_chars,
            char_embed_dim=args.char_embed_dim,
            char_cnn_params=args.char_cnn_params,
            char_cnn_nonlinear_fn=args.char_cnn_nonlinear_fn,
            char_cnn_num_highway_layers=args.char_cnn_num_highway_layers,
            use_pretrained_weights=False,
            finetune_pretrained_weights=False,
        )
Пример #5
0
    def test_forward_training_precompute(self):
        """
        We test if precomputation gives the same result.
        """
        test_args = test_utils.ModelParamsDict(arch="char_aware_hybrid")

        decoder_embed_tokens = transformer.build_embedding(
            dictionary=self.word_dict, embed_dim=10
        )
        decoder = maybe_cuda(
            char_aware_hybrid.CharAwareHybridRNNDecoder(
                args=test_args,
                src_dict=self.word_dict,
                dst_dict=self.word_dict,
                embed_tokens=decoder_embed_tokens,
                num_chars=len(self.char_dict),
            )
        )
        decoder.eval()
        prev_output_word_strs = self.word_dict.symbols[-3:]
        prev_out_word_indices = [
            self.word_dict.indices[w] for w in prev_output_word_strs
        ]
        prev_output_tokens = maybe_cuda(
            torch.LongTensor(prev_out_word_indices).unsqueeze(1)
        )

        prev_output_chars = maybe_cuda(
            torch.LongTensor(
                [
                    decoder._char_list_for_word(
                        word_index=word_index, word=word, char_dict=self.char_dict
                    )
                    for word_index, word in zip(
                        prev_out_word_indices, prev_output_word_strs
                    )
                ]
            )
        )

        embed_output = maybe_cuda(
            decoder._embed_prev_outputs(
                prev_output_tokens=prev_output_tokens,
                prev_output_chars=prev_output_chars,
            )[0]
        )

        decoder.precompute_char_representations(
            char_dict=self.char_dict, embed_bytes=False, batch_size=30
        )
        embed_output_after_precompute = decoder._embed_prev_outputs(
            prev_output_tokens=prev_output_tokens, prev_output_chars=prev_output_chars
        )[0]
        # Due to a known issue in padding, we know that the results are not exactly
        # the same.
        assert torch.allclose(
            embed_output, embed_output_after_precompute, rtol=1e-04, atol=1e-04
        )
Пример #6
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted
        # (in case there are any new ones)
        base_architecture(args)

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        encoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=src_dict,
            embed_dim=args.encoder_embed_dim,
            path=args.encoder_pretrained_embed,
            freeze=args.encoder_freeze_embed,
        )

        teacher_decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=tgt_dict,
            embed_dim=args.decoder_embed_dim,
            path=args.decoder_pretrained_embed,
            freeze=args.decoder_freeze_embed,
        )

        student_decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=tgt_dict, embed_dim=args.student_decoder_embed_dim
        )

        encoder = pytorch_translate_transformer.TransformerEncoder(
            args, src_dict, encoder_embed_tokens, proj_to_decoder=False
        )

        teacher_decoder = pytorch_translate_transformer.TransformerModel.build_decoder(
            args, src_dict, tgt_dict, embed_tokens=teacher_decoder_embed_tokens
        )

        student_decoder = StudentHybridRNNDecoder(
            args, src_dict, tgt_dict, student_decoder_embed_tokens
        )

        return DualDecoderKDModel(
            task=task,
            encoder=encoder,
            teacher_decoder=teacher_decoder,
            student_decoder=student_decoder,
        )
Пример #7
0
 def build_decoder(cls, args, src_dict, dst_dict):
     decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
         dictionary=dst_dict,
         embed_dim=args.decoder_embed_dim,
         path=args.decoder_pretrained_embed,
         freeze=args.decoder_freeze_embed,
     )
     return hybrid_transformer_rnn.HybridRNNDecoder(args, src_dict,
                                                    dst_dict,
                                                    decoder_embed_tokens)
Пример #8
0
    def test_precompute(self):
        """
        We test that if we shuffle the input sample, we will get the same
        forward values, both in training mode (without dropout) and in
        eval mode.
        For the meanwhile, we use an auxiliary hybrid_transformer_rnn
        in order to get the encoder output.
        """
        test_args = test_utils.ModelParamsDict(arch="char_aware_hybrid")

        decoder_embed_tokens = maybe_cuda(
            transformer.build_embedding(dictionary=self.word_dict,
                                        embed_dim=10))

        decoder = maybe_cuda(
            char_aware_hybrid.CharAwareHybridRNNDecoder(
                args=test_args,
                src_dict=self.word_dict,
                dst_dict=self.word_dict,
                embed_tokens=decoder_embed_tokens,
                num_chars=len(self.char_dict),
            ))
        # Making sure we do not apply dropout.
        decoder.eval()
        decoder.precompute_char_representations(char_dict=self.char_dict,
                                                embed_bytes=False,
                                                batch_size=5)

        first_embedding = decoder.combined_word_char_embed.weight.clone()
        first_embedding.detach()
        decoder.precompute_char_representations(char_dict=self.char_dict,
                                                embed_bytes=False,
                                                batch_size=11)
        second_embedding = decoder.combined_word_char_embed.weight.clone()
        second_embedding.detach()

        # Due to a known issue in the char_encoder model, this does not hold for
        # torch.equal (T53048656)
        assert torch.allclose(first_embedding,
                              second_embedding,
                              rtol=1e-04,
                              atol=1e-04)

        decoder.precompute_char_representations(char_dict=self.char_dict,
                                                embed_bytes=False,
                                                batch_size=23)
        third_embedding = decoder.combined_word_char_embed.weight.clone()
        third_embedding.detach()
        # Due to a known issue in the char_encoder model, this does not hold for
        # torch.equal (T53048656)
        assert torch.allclose(second_embedding,
                              third_embedding,
                              rtol=1e-04,
                              atol=1e-04)
    def build_model(cls, args, task):
        """Build a new model instance."""
        src_dict, dst_dict = task.source_dictionary, task.target_dictionary
        base_architecture(args)

        assert hasattr(args, "char_source_dict_size"), (
            "args.char_source_dict_size required. "
            "should be set by load_binarized_dataset()"
        )

        if args.share_all_embeddings:
            if src_dict != dst_dict:
                raise RuntimeError(
                    "--share-all-embeddings requires a joined dictionary"
                )
            if args.encoder_embed_dim != args.decoder_embed_dim:
                raise RuntimeError(
                    "--share-all-embeddings requires --encoder-embed-dim "
                    "to match --decoder-embed-dim"
                )
            if args.decoder_pretrained_embed and (
                args.decoder_pretrained_embed != args.encoder_pretrained_embed
            ):
                raise RuntimeError(
                    "--share-all-embeddings not compatible with "
                    "--decoder-pretrained-embed"
                )
            encoder_embed_tokens = transformer.build_embedding(
                src_dict,
                args.encoder_embed_dim,
                args.encoder_pretrained_embed,
                args.encoder_freeze_embed,
            )
            decoder_embed_tokens = encoder_embed_tokens
            args.share_decoder_input_output_embed = True
        else:
            encoder_embed_tokens = transformer.build_embedding(
                src_dict,
                args.encoder_embed_dim,
                args.encoder_pretrained_embed,
                args.encoder_freeze_embed,
            )
            decoder_embed_tokens = transformer.build_embedding(
                dst_dict,
                args.decoder_embed_dim,
                args.decoder_pretrained_embed,
                args.decoder_freeze_embed,
            )

        args.embed_bytes = getattr(args, "embed_bytes", False)

        # If we embed bytes then the number of indices is fixed and does not
        # depend on the dictionary
        if args.embed_bytes:
            num_chars = vocab_constants.NUM_BYTE_INDICES + TAGS.__len__() + 1
        else:
            num_chars = args.char_source_dict_size

        # In case use_pretrained_weights is true, verify the model params
        # are correctly set
        if args.embed_bytes and getattr(args, "use_pretrained_weights", False):
            verify_pretrain_params(args)

        encoder = CharCNNEncoder(
            args,
            src_dict,
            encoder_embed_tokens,
            num_chars=num_chars,
            embed_dim=args.char_embed_dim,
            char_cnn_params=args.char_cnn_params,
            char_cnn_nonlinear_fn=args.char_cnn_nonlinear_fn,
            char_cnn_pool_type=args.char_cnn_pool_type,
            char_cnn_num_highway_layers=args.char_cnn_num_highway_layers,
            char_cnn_output_dim=getattr(args, "char_cnn_output_dim", -1),
            use_pretrained_weights=getattr(args, "use_pretrained_weights", False),
            finetune_pretrained_weights=getattr(
                args, "finetune_pretrained_weights", False
            ),
            weights_file=getattr(args, "pretrained_weights_file", ""),
        )
        decoder = transformer.TransformerDecoder(
            args=args,
            src_dict=src_dict,
            dst_dict=dst_dict,
            embed_tokens=decoder_embed_tokens,
        )
        return cls(task, encoder, decoder)
Пример #10
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        src_dict, dst_dict = task.source_dictionary, task.target_dictionary
        base_architecture(args)

        assert hasattr(args, "char_source_dict_size"), (
            "args.char_source_dict_size required. "
            "should be set by load_binarized_dataset()")

        assert hasattr(
            args, "char_cnn_params"
        ), "Only char CNN is supported for the char encoder hybrid model"

        args.embed_bytes = getattr(args, "embed_bytes", False)

        # If we embed bytes then the number of indices is fixed and does not
        # depend on the dictionary
        if args.embed_bytes:
            num_chars = vocab_constants.NUM_BYTE_INDICES + TAGS.__len__() + 1
        else:
            num_chars = args.char_source_dict_size

        # In case use_pretrained_weights is true, verify the model params
        # are correctly set
        if args.embed_bytes and getattr(args, "use_pretrained_weights", False):
            char_source_model.verify_pretrain_params(args)

        encoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            src_dict,
            args.encoder_embed_dim,
            args.encoder_pretrained_embed,
            args.encoder_freeze_embed,
        )
        encoder = CharCNNEncoder(
            args,
            src_dict,
            encoder_embed_tokens,
            num_chars=num_chars,
            embed_dim=args.char_embed_dim,
            char_cnn_params=args.char_cnn_params,
            char_cnn_nonlinear_fn=args.char_cnn_nonlinear_fn,
            char_cnn_pool_type=args.char_cnn_pool_type,
            char_cnn_num_highway_layers=args.char_cnn_num_highway_layers,
            char_cnn_output_dim=getattr(args, "char_cnn_output_dim", -1),
            use_pretrained_weights=getattr(args, "use_pretrained_weights",
                                           False),
            finetune_pretrained_weights=getattr(args,
                                                "finetune_pretrained_weights",
                                                False),
            weights_file=getattr(args, "pretrained_weights_file", ""),
            left_pad=False,
        )

        decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dst_dict,
            args.decoder_embed_dim,
            args.decoder_pretrained_embed,
            args.decoder_freeze_embed,
        )
        decoder = hybrid_transformer_rnn.HybridRNNDecoder(
            args, src_dict, dst_dict, decoder_embed_tokens)

        return cls(task, encoder, decoder)
Пример #11
0
    def test_forward_training(self):
        """
        We test that if we shuffle the input sample, we will get the same
        forward values, both in training mode (without dropout) and in
        eval mode.
        For the meanwhile, we use an auxiliary hybrid_transformer_rnn
        in order to get the encoder output.
        """
        test_word_decoder_args = test_utils.ModelParamsDict(
            arch="hybrid_transformer_rnn")
        self.task = tasks.DictionaryHolderTask(self.word_dict, self.word_dict)
        word_model = maybe_cuda(self.task.build_model(test_word_decoder_args))
        word_model.eval()  # Make sure we do not apply dropout.

        test_args = test_utils.ModelParamsDict(arch="char_aware_hybrid")

        decoder_embed_tokens = maybe_cuda(
            transformer.build_embedding(dictionary=self.word_dict,
                                        embed_dim=10))
        decoder = maybe_cuda(
            char_aware_hybrid.CharAwareHybridRNNDecoder(
                args=test_args,
                src_dict=self.word_dict,
                dst_dict=self.word_dict,
                embed_tokens=decoder_embed_tokens,
                num_chars=len(self.char_dict),
            ))

        src_tokens = maybe_cuda(self.sample["net_input"]["src_tokens"])
        src_lengths = maybe_cuda(self.sample["net_input"]["src_lengths"])
        prev_output_chars = maybe_cuda(
            self.sample["net_input"]["prev_output_chars"][:,
                                                          -1:, :].squeeze(1))
        prev_output_tokens = maybe_cuda(
            self.sample["net_input"]["prev_output_tokens"][:, 0:1])

        encoder_out = word_model.encoder(src_tokens, src_lengths)

        embed_output = decoder._embed_prev_outputs(
            prev_output_tokens=prev_output_tokens,
            prev_output_chars=prev_output_chars)[0]
        forward_output = decoder(
            prev_output_tokens=prev_output_tokens,
            encoder_out=encoder_out,
            prev_output_chars=prev_output_chars,
        )
        output_logits = forward_output[0]

        prev_output_tokens_shuffled = torch.cat(
            [prev_output_tokens[1:], prev_output_tokens[0].unsqueeze(0)],
            dim=0)
        prev_output_chars_shuffled = torch.cat(
            [prev_output_chars[1:], prev_output_chars[0].unsqueeze(0)], dim=0)
        src_tokens_shuffled = torch.cat(
            [src_tokens[1:], src_tokens[0].unsqueeze(0)], dim=0)

        # Making sure shuffling is done correctly.
        assert torch.equal(src_tokens[0], src_tokens_shuffled[2])
        assert torch.equal(src_tokens[1], src_tokens_shuffled[0])
        assert torch.equal(src_tokens[2], src_tokens_shuffled[1])
        assert torch.equal(prev_output_chars[0], prev_output_chars_shuffled[2])
        assert torch.equal(prev_output_chars[1], prev_output_chars_shuffled[0])
        assert torch.equal(prev_output_chars[2], prev_output_chars_shuffled[1])
        assert torch.equal(prev_output_tokens[0],
                           prev_output_tokens_shuffled[2])
        assert torch.equal(prev_output_tokens[1],
                           prev_output_tokens_shuffled[0])
        assert torch.equal(prev_output_tokens[2],
                           prev_output_tokens_shuffled[1])

        # Making sure that we embed the inputs correctly.
        encoder_out_shuffled = word_model.encoder(src_tokens_shuffled,
                                                  src_lengths)
        embed_output_shuffled = decoder._embed_prev_outputs(
            prev_output_tokens=prev_output_tokens_shuffled,
            prev_output_chars=prev_output_chars_shuffled,
        )[0]
        assert embed_output[0, 0].equal(embed_output_shuffled[0, 2])
        assert embed_output[0, 1].equal(embed_output_shuffled[0, 0])
        assert embed_output[0, 2].equal(embed_output_shuffled[0, 1])

        # Making sure the output of the forward function is correct.
        forward_output_shuffled = decoder(
            prev_output_tokens=prev_output_tokens_shuffled,
            encoder_out=encoder_out_shuffled,
            prev_output_chars=prev_output_chars_shuffled,
        )
        output_logits_shuffled = forward_output_shuffled[0]

        assert encoder_out[0][:, 0, :].equal(encoder_out_shuffled[0][:, 2, :])
        assert encoder_out[0][:, 1, :].equal(encoder_out_shuffled[0][:, 0, :])
        assert encoder_out[0][:, 2, :].equal(encoder_out_shuffled[0][:, 1, :])

        assert output_logits[0].equal(output_logits_shuffled[2])
        assert output_logits[1].equal(output_logits_shuffled[0])
        assert output_logits[2].equal(output_logits_shuffled[1])
        """
        Now trying in the eval mode.
        """
        decoder.eval()
        forward_output = decoder(
            prev_output_tokens=prev_output_tokens,
            encoder_out=encoder_out,
            prev_output_chars=prev_output_chars,
        )
        output_logits = forward_output[0]
        forward_output_shuffled = decoder(
            prev_output_tokens=prev_output_tokens_shuffled,
            encoder_out=encoder_out_shuffled,
            prev_output_chars=prev_output_chars_shuffled,
        )
        output_logits_shuffled = forward_output_shuffled[0]
        assert output_logits[0].equal(output_logits_shuffled[2])
        assert output_logits[1].equal(output_logits_shuffled[0])
        assert output_logits[2].equal(output_logits_shuffled[1])