Exemplo n.º 1
0
    def __init__(
        self, conv_layers_before=None, input_size=83, hidden_size=512,
        num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False,
        residual=False, left_pad=False, pretrained_embed=None, padding_value=0.,
    ):
        super().__init__(None)  # no src dictionary
        self.conv_layers_before = conv_layers_before
        self.num_layers = num_layers
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.residual = residual

        self.lstm = nn.ModuleList([
            LSTM(
                input_size=input_size if layer == 0 else 2 * hidden_size if self.bidirectional else hidden_size,
                hidden_size=hidden_size,
                bidirectional=bidirectional,
            )
            for layer in range(num_layers)
        ])
        self.left_pad = left_pad
        self.padding_value = padding_value

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
Exemplo n.º 2
0
    def __init__(
        self, conv_layers_before=None, input_size=83, hidden_size=512,
        num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False,
        residual=False, left_pad=False, padding_value=0., src_bucketed=False,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(None)  # no src dictionary
        self.conv_layers_before = conv_layers_before
        self.num_layers = num_layers
        self.dropout_in_module = FairseqDropout(dropout_in, module_name=self.__class__.__name__)
        self.dropout_out_module = FairseqDropout(dropout_out, module_name=self.__class__.__name__)
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.residual = residual
        self.max_source_positions = max_source_positions

        self.lstm = nn.ModuleList([
            LSTM(
                input_size=input_size if layer == 0 else 2 * hidden_size if self.bidirectional else hidden_size,
                hidden_size=hidden_size,
                bidirectional=bidirectional,
            )
            for layer in range(num_layers)
        ])
        self.left_pad = left_pad
        self.padding_value = padding_value
        self.src_bucketed = src_bucketed

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
Exemplo n.º 3
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        bidirectional=False,
        left_pad_source=True,
        pretrained_embed=None,
        padding_idx=None,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
        rnn_type="gru"
    ):
        super().__init__(dictionary)
        self.num_layers = num_layers
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__
        )
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.max_source_positions = max_source_positions

        num_embeddings = len(dictionary)
        self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = torch.nn.Embedding(num_embeddings, embed_dim, self.padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.rnn_type = rnn_type
        if rnn_type == "gru":
            self.hidden = GRU(
                input_size=embed_dim,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
                bidirectional=bidirectional,
            )
        elif rnn_type == "lstm":
            self.hidden = LSTM(
                input_size=embed_dim,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
                bidirectional=bidirectional,
            )

        self.left_pad_source = left_pad_source

        self.output_units = hidden_size
        if bidirectional:
            self.bidir_dense = torch.nn.Linear(2, 1)
Exemplo n.º 4
0
    def __init__(
        self, dictionary, embed_dim, hidden_size=512,
        bidirectional=True, num_layers=2, no_token_rnn=False,
    ):
        super().__init__(dictionary)
        self.need_rnn = not no_token_rnn
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.num_layers = num_layers

        self.rnn = LSTM(
            input_size=embed_dim,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            bidirectional=self.bidirectional,
        ) if self.need_rnn else None

        hidden_size = self.hidden_size if self.need_rnn else embed_dim
        self.classifier = nn.Linear(hidden_size, len(dictionary))
Exemplo n.º 5
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        bidirectional=False,
        left_pad=True,
        pretrained_embed=None,
        padding_value=0.,
    ):
        super().__init__(dictionary)
        self.num_layers = num_layers
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size

        num_embeddings1 = len(dictionary[0])
        num_embeddings2 = len(dictionary[1])
        self.padding_idx_1 = dictionary[0].pad()
        self.padding_idx_2 = dictionary[1].pad()
        if pretrained_embed is None:
            self.embed_tokens_1 = Embedding(num_embeddings1, embed_dim,
                                            self.padding_idx_1)
            if bidirectional:
                self.embed_tokens_2 = Embedding(num_embeddings2, 2 * embed_dim,
                                                self.padding_idx_2)
            else:
                self.embed_tokens_2 = Embedding(num_embeddings2, embed_dim,
                                                self.padding_idx_2)
        else:
            self.embed_tokens_1, self.embed_tokens_2 = pretrained_embed

        self.lstm1 = LSTM(
            input_size=embed_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=self.dropout_out if num_layers > 1 else 0.,
            bidirectional=bidirectional,
        )
        # self.lstm2 = LSTM(
        #     input_size=embed_dim,
        #     hidden_size=hidden_size,
        #     num_layers=num_layers,
        #     dropout=self.dropout_out if num_layers > 1 else 0.,
        #     bidirectional=bidirectional,
        # )
        if self.bidirectional:
            self.fconv2 = FConvEncoder(dictionary[1],
                                       2 * embed_dim,
                                       convolutions=[(512, 3)] * 15,
                                       dropout=dropout_in,
                                       left_pad=left_pad)
        else:
            self.fconv2 = FConvEncoder(dictionary[1],
                                       embed_dim,
                                       convolutions=[(512, 3)] * 15,
                                       dropout=dropout_in,
                                       left_pad=left_pad)
        self.fconv2.num_attention_layers = 1
        self.left_pad = left_pad
        self.padding_value = padding_value

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
Exemplo n.º 6
0
    def __init__(
        self,
        pre_encoder=None,
        input_size=83,
        hidden_size=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        bidirectional=False,
        residual=False,
        left_pad=False,
        padding_value=0.0,
        src_bucketed=False,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
        multilayer_rnn_as_single_module=False,
    ):
        super().__init__(None)  # no src dictionary
        self.pre_encoder = pre_encoder
        self.num_layers = num_layers
        self.dropout_in_module = FairseqDropout(
            dropout_in * 1.0, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out * 1.0, module_name=self.__class__.__name__
        )
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.residual = residual
        self.max_source_positions = max_source_positions

        # enforce deterministic behavior (https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html)
        os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
        self.multilayer_rnn_as_single_module = multilayer_rnn_as_single_module
        if self.multilayer_rnn_as_single_module:
            self.lstm = LSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
                bidirectional=bidirectional,
            )
        else:
            self.lstm = nn.ModuleList(
                [
                    LSTM(
                        input_size=input_size
                        if layer == 0
                        else 2 * hidden_size
                        if self.bidirectional
                        else hidden_size,
                        hidden_size=hidden_size,
                        bidirectional=bidirectional,
                    )
                    for layer in range(num_layers)
                ]
            )
        self.left_pad = left_pad
        self.padding_value = padding_value
        self.src_bucketed = src_bucketed

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
Exemplo n.º 7
0
    def __init__(self,
                 dictionary,
                 embed_tokens,
                 embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 bidirectional=False,
                 left_pad=False,
                 padding_value=0.,
                 adaptive_softmax=False,
                 adaptive_softmax_cutoff=[],
                 adaptive_softmax_dropout=0.1,
                 adaptive_softmax_factor=None):
        super(LSTMTaggerDecoder, self).__init__(dictionary=dictionary)

        if hasattr(embed_tokens, "embedded_dim"):
            self.in_embed_dim = embed_tokens.embedded_dim
        elif hasattr(embed_tokens, "embed_dim"):
            self.in_embed_dim = embed_tokens.embed_dim
        elif hasattr(embed_tokens, "embedding_dim"):
            self.in_embed_dim = embed_tokens.embedding_dim
        else:
            raise Exception
        self.output_units = self.embed_dim = embed_dim
        self.out_embed_dim = len(dictionary)

        self.num_layers = num_layers
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out

        self.bidirectional = bidirectional
        if self.bidirectional:
            #self.output_units *= 2
            pass

        self.padding_idx = dictionary.pad()
        self.padding_value = 0.
        self.left_pad = left_pad

        self.embed_tokens = embed_tokens

        self.fc_in = self.fc_out1 = self.fc_out2 = None
        if self.in_embed_dim != self.embed_dim:
            self.fc_in = Linear(self.in_embed_dim, self.embed_dim)
        if self.output_units != self.embed_dim:
            self.fc_out1 = Linear(self.output_units, self.embed_dim)
        if self.embed_dim != self.out_embed_dim:
            self.fc_out2 = Linear(self.embed_dim, self.out_embed_dim)

        self.lstm = LSTM(
            input_size=embed_dim,
            hidden_size=embed_dim,
            num_layers=num_layers,
            dropout=self.dropout_out if num_layers > 1 else 0.,
            bidirectional=bidirectional,
        )

        self.adaptive_softmax = None

        if adaptive_softmax:
            self.adaptive_softmax = AdaptiveSoftmax(
                len(dictionary),
                self.embed_dim,
                adaptive_softmax_cutoff,
                dropout=adaptive_softmax_dropout,
                adaptive_inputs=None,
                factor=adaptive_softmax_factor,
                tie_proj=False,
            )
Exemplo n.º 8
0
    def __init__(
        self,
        dictionary,
        rnn_type="lstm",
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention_type="luong-dot",
        encoder_output_units=512,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS,
        residuals=False,
    ):
        super().__init__(dictionary)
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__
        )
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.max_target_positions = max_target_positions
        self.residuals = residuals
        self.num_layers = num_layers

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = torch.nn.Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        if encoder_output_units != hidden_size and encoder_output_units != 0:
            self.encoder_hidden_proj = torch.nn.Linear(encoder_output_units, hidden_size)
            self.encoder_cell_proj = torch.nn.Linear(encoder_output_units, hidden_size)
        else:
            self.encoder_hidden_proj = self.encoder_cell_proj = None

        # input feeding is described in arxiv.org/abs/1508.04025
        input_feed_size = 0 if encoder_output_units == 0 else hidden_size
        # For Bahdanau, we compute the context on the input feed
        bahd_factor = hidden_size \
            if attention_type in ["bahdanau-dot", "bahdanau-concat", "bahdanau-general", "bahdanau"] \
            else 0
        self.rnn_type = rnn_type
        if rnn_type == "lstm":
            self.layers = LSTM(
                input_size=input_feed_size + embed_dim + bahd_factor,
                hidden_size=hidden_size,
                num_layers=num_layers
            )
        else:
            self.layers = GRU(
                input_size=input_feed_size + embed_dim + bahd_factor,
                hidden_size=hidden_size,
                num_layers=num_layers
            )

        if attention_type == "none":
            self.attention_type = "none"
            self.attention = None
        else:
            self.attention_type = attention_type
            self.attention = Attention(self.attention_type, hidden_size)

        if hidden_size != out_embed_dim:
            self.additional_fc = torch.nn.Linear(hidden_size, out_embed_dim)

        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                hidden_size,
                adaptive_softmax_cutoff,
                dropout=dropout_out,
            )
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)