Exemplo n.º 1
0
    def __init__(self, args, dictionary, embed_tokens, proj_to_decoder=True):
        super().__init__(dictionary)
        self.transformer_embedding = TransformerEmbedding(
            args=args, embed_tokens=embed_tokens)

        self.transformer_encoder_given_embeddings = TransformerEncoderGivenEmbeddings(
            args=args, proj_to_decoder=proj_to_decoder)

        # Variable tracker
        self.tracker = VariableTracker()

        # Initialize adversarial mode
        self.set_gradient_tracking_mode(False)
        self.set_embed_noising_mode(False)
    def __init__(
        self,
        args,
        dictionary,
        embed_tokens,
        num_chars=50,
        embed_dim=32,
        char_cnn_params="[(128, 3), (128, 5)]",
        char_cnn_nonlinear_fn="tanh",
        char_cnn_pool_type="max",
        char_cnn_num_highway_layers=0,
        char_cnn_output_dim=-1,
        use_pretrained_weights=False,
        finetune_pretrained_weights=False,
        weights_file=None,
    ):
        super().__init__(dictionary)

        convolutions_params = literal_eval(char_cnn_params)
        self.char_cnn_encoder = char_encoder.CharCNNModel(
            dictionary,
            num_chars,
            embed_dim,
            convolutions_params,
            char_cnn_nonlinear_fn,
            char_cnn_pool_type,
            char_cnn_num_highway_layers,
            char_cnn_output_dim,
            use_pretrained_weights,
            finetune_pretrained_weights,
            weights_file,
        )

        self.embed_tokens = embed_tokens
        token_embed_dim = embed_tokens.embedding_dim
        self.word_layer_norm = nn.LayerNorm(token_embed_dim)

        char_embed_dim = (
            char_cnn_output_dim
            if char_cnn_output_dim != -1
            else sum(out_dim for (out_dim, _) in convolutions_params)
        )
        self.char_layer_norm = nn.LayerNorm(char_embed_dim)
        self.word_dim = char_embed_dim + token_embed_dim
        self.char_scale = math.sqrt(char_embed_dim / self.word_dim)
        self.word_scale = math.sqrt(token_embed_dim / self.word_dim)
        if self.word_dim != args.encoder_embed_dim:
            self.word_to_transformer_embed = fairseq_transformer.Linear(
                self.word_dim, args.encoder_embed_dim
            )

        self.dropout = args.dropout

        self.padding_idx = dictionary.pad()
        self.embed_positions = fairseq_transformer.PositionalEmbedding(
            1024,
            args.encoder_embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        )

        self.transformer_encoder_given_embeddings = TransformerEncoderGivenEmbeddings(
            args=args, proj_to_decoder=True
        )

        # Variable tracker
        self.tracker = VariableTracker()
        # Initialize adversarial mode
        self.set_gradient_tracking_mode(False)
        self.set_embed_noising_mode(False)

        # disables sorting and word-length thresholding if True
        # (enables ONNX tracing of length-sorted input with batch_size = 1)
        self.onnx_export_model = False
Exemplo n.º 3
0
class TransformerEncoder(FairseqEncoder):
    """Transformer encoder."""
    def __init__(self, args, dictionary, embed_tokens, proj_to_decoder=True):
        super().__init__(dictionary)
        self.transformer_embedding = TransformerEmbedding(
            args=args, embed_tokens=embed_tokens)

        self.transformer_encoder_given_embeddings = TransformerEncoderGivenEmbeddings(
            args=args, proj_to_decoder=proj_to_decoder)

        # Variable tracker
        self.tracker = VariableTracker()

        # Initialize adversarial mode
        self.set_gradient_tracking_mode(False)
        self.set_embed_noising_mode(False)

    def forward(self, src_tokens, src_lengths):
        # Initialize the tracker to keep track of internal variables
        self.tracker.reset()
        x, encoder_padding_mask, positions = self.transformer_embedding(
            src_tokens=src_tokens, src_lengths=src_lengths)
        # Track token embeddings
        self.tracker.track(x,
                           "token_embeddings",
                           retain_grad=self.track_gradients)

        x = self.transformer_encoder_given_embeddings(
            x=x,
            positions=positions,
            encoder_padding_mask=encoder_padding_mask)

        # TODO(jamesreed): this is kinda a hack because we can't annotate an
        # Optional[Tensor] output for encoder_padding_mask
        if encoder_padding_mask is None:
            encoder_padding_mask = torch.empty([])

        return x, src_tokens, encoder_padding_mask

    def reorder_encoder_out(self, encoder_out, new_order):
        (x, src_tokens, encoder_padding_mask) = encoder_out
        src_tokens_tensor = pytorch_translate_utils.get_source_tokens_tensor(
            src_tokens)
        if x is not None:
            x = x.index_select(1, new_order)
        if src_tokens_tensor is not None:
            src_tokens_tensor = src_tokens_tensor.index_select(0, new_order)
        if encoder_padding_mask.shape == torch.Size([]):
            encoder_padding_mask = None
        if encoder_padding_mask is not None:
            encoder_padding_mask = encoder_padding_mask.index_select(
                0, new_order)
        return (x, src_tokens_tensor, encoder_padding_mask)

    def max_positions(self):
        """Maximum input length supported by the encoder."""
        return self.transformer_embedding.embed_positions.max_positions()

    def upgrade_state_dict_named(self, state_dict, name):
        if isinstance(self.transformer_embedding.embed_positions,
                      SinusoidalPositionalEmbedding):
            if f"{name}.transformer_embedding.embed_positions.weights" in state_dict:
                del state_dict[
                    f"{name}.transformer_embedding.embed_positions.weights"]
            state_dict[
                f"{name}.transformer_embedding.embed_positions._float_tensor"] = torch.FloatTensor(
                    1)
        self.transformer_encoder_given_embeddings.upgrade_state_dict_named(
            state_dict, f"{name}.transformer_encoder_given_embeddings")
        return state_dict

    def set_gradient_tracking_mode(self, mode=True):
        self.tracker.reset()
        self.track_gradients = mode

    def set_embed_noising_mode(self, mode=True):
        """This allows adversarial trainer to turn on and off embedding noising
        layers. In regular training, this mode is off, and it is not included
        in forward pass.
        """
        self.embed_noising_mode = mode