Пример #1
0
    def __init__(self, cfg, encoder, decoder):
        super().__init__()

        self.encoder = encoder
        self.decoder = decoder

        check_type(self.encoder, FairseqEncoder)
        check_type(self.decoder, FairseqDecoder)

        self.proj_encoder = Linear(cfg.encoder.embed_dim, cfg.joint_dim)
        self.laynorm_proj_encoder = LayerNorm(cfg.joint_dim, export=cfg.export)
        self.proj_decoder = Linear(cfg.decoder.hidden_size, cfg.joint_dim)
        self.laynorm_proj_decoder = LayerNorm(cfg.joint_dim, export=cfg.export)
        assert hasattr(self.decoder, "embed_tokens")
        if cfg.share_decoder_input_output_embed:
            assert (
                cfg.joint_dim == cfg.decoder.embed_dim
            ), "joint_dim and decoder.embed_dim must be the same if the two embeddings are to be shared"
            self.fc_out = nn.Linear(
                self.decoder.embed_tokens.embedding_dim,
                self.decoder.embed_tokens.num_embeddings,
                bias=False,
            )
            self.fc_out.weight = self.decoder.embed_tokens.weight
        else:
            self.fc_out = nn.Linear(cfg.joint_dim,
                                    self.decoder.embed_tokens.num_embeddings,
                                    bias=False)
            nn.init.normal_(self.fc_out.weight,
                            mean=0,
                            std=cfg.joint_dim**-0.5)
            self.fc_out = nn.utils.weight_norm(self.fc_out, name="weight")

        self.cfg = cfg
        self.num_updates = 0
    def __init__(self,
                 dictionary,
                 lang_dictionary,
                 embedding,
                 lang_embedding,
                 embed_dim=512,
                 hidden_size=512,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True,
                 encoder_output_units=1024,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 adaptive_softmax_cutoff=None,
                 lang_embedding_size=32):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.lang_embeddings_size = lang_embedding_size
        self.lang_dictionary = lang_dictionary
        self.embed_langs = lang_embedding

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = embedding

        self.encoder_output_units = encoder_output_units

        self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size)
        self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=hidden_size + embed_dim +
                self.lang_embeddings_size +
                self.encoder_output_units if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])

        self.attention = None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    hidden_size,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
Пример #3
0
    def __init__(
        self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512,
        num_layers=1, dropout_in=0.1, dropout_out=0.1, encoder_output_units=0,
        attn_type=None, attn_dim=0, need_attn=False, residual=False, pretrained_embed=None,
        share_input_output_embed=False, adaptive_softmax_cutoff=None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        if attn_type is None or attn_type.lower() == 'none':
            # no attention, no encoder output needed (language model case)
            need_attn = False
            encoder_output_units = 0
        self.need_attn = need_attn
        self.residual = residual

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=encoder_output_units + (embed_dim if layer == 0 else hidden_size),
                hidden_size=hidden_size,
            )
            for layer in range(num_layers)
        ])
        if attn_type is None or attn_type.lower() == 'none':
            self.attention = None
        elif attn_type.lower() == 'bahdanau':
            self.attention = speech_attention.BahdanauAttention(
                hidden_size, encoder_output_units, attn_dim,
            )
        elif attn_type.lower() == 'luong':
            self.attention = speech_attention.LuongAttention(
                hidden_size, encoder_output_units,
            )
        else:
            raise ValueError('unrecognized attention type.')
        if hidden_size + encoder_output_units != out_embed_dim:
            self.additional_fc = Linear(hidden_size + encoder_output_units, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, hidden_size, adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
Пример #4
0
    def __init__(
        self,
        dictionary: Dictionary,
        embed_dim: int = 512,
        hidden_size: int = 512,
        out_embed_dim: int = 512,
        num_layers: int = 1,
        dropout_in: float = 0.1,
        dropout_out: float = 0.1,
        attention: bool = True,
        encoder_embed_dim: int = 512,
        encoder_output_units: int = 512,
        pretrained_embed: Optional[nn.Embedding] = None,
        share_input_output_embed: bool = False,
        adaptive_softmax_cutoff: Optional[int] = None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=hidden_size +
                embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])
        self.attention = AttentionLayer(hidden_size, encoder_output_units,
                                        hidden_size) if attention else None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    embed_dim,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
Пример #5
0
    def __init__(self,
                 input_embed_dim,
                 source_embed_dim,
                 output_embed_dim,
                 bias=False):
        super().__init__()

        self.input_proj = Linear(input_embed_dim, source_embed_dim, bias=bias)
        self.output_proj = Linear(input_embed_dim + source_embed_dim,
                                  output_embed_dim,
                                  bias=bias)
Пример #6
0
    def __init__(self,
                 dictionary,
                 lang_dictionary,
                 embed_dim=512,
                 hidden_size=512,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True,
                 encoder_output_units=512,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 adaptive_softmax_cutoff=None,
                 lang_embedding_size=32):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.lang_embedding_size = lang_embedding_size
        self.lang_dictionary = lang_dictionary
        self.embed_langs = nn.Embedding(len(lang_dictionary),
                                        lang_embedding_size)
        self.need_attn = False

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)

        self.encoder_output_units = encoder_output_units
        self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size)
        self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)

        input_size = hidden_size + embed_dim + lang_embedding_size + encoder_output_units
        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=input_size if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])

        self.attention = None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
Пример #7
0
    def __init__(
        self, conv_layers_before=None, input_size=83, hidden_size=512,
        num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False,
        residual=False, left_pad=False, padding_value=0.,
        num_targets=None, chunk_width=20, chunk_left_context=0, training_stage=True,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(
            conv_layers_before=conv_layers_before, input_size=input_size, hidden_size=hidden_size,
            num_layers=num_layers, dropout_in=dropout_in,  dropout_out=dropout_in,
            bidirectional=bidirectional, residual=residual, left_pad=left_pad,
            padding_value=padding_value, max_source_positions=max_source_positions,
        )
        receptive_field_radius = sum(conv.padding[0] for conv in conv_layers_before.convolutions) \
            if conv_layers_before is not None else 0
        assert chunk_width is None or chunk_width > 0
        assert (conv_layers_before is None and chunk_left_context >= 0) or \
            (conv_layers_before is not None and chunk_left_context >= receptive_field_radius)
        self.out_chunk_begin = self.output_lengths(chunk_left_context + 1) - 1
        self.out_chunk_end = self.output_lengths(chunk_left_context + chunk_width) \
            if chunk_width is not None else None
        self.training_stage = training_stage

        # only for encoder-only model
        self.fc_out = Linear(self.output_units, num_targets, dropout=dropout_out) \
            if num_targets is not None else None
Пример #8
0
    def __init__(
        self, input_size, output_size, hidden_sizes=256, kernel_sizes=3, strides=1,
        dilations=3, num_layers=1, dropout_in=0.0, dropout_out=0.0, residual=False,
        chunk_width=None, chunk_left_context=0, training_stage=True,
    ):
        super().__init__(None)  # no src dictionary
        self.num_layers = num_layers
        if isinstance(hidden_sizes, int):
            hidden_sizes = [hidden_sizes] * num_layers
        else:
            assert len(hidden_sizes) == num_layers
        if isinstance(kernel_sizes, int):
            kernel_sizes = [kernel_sizes] * num_layers
        else:
            assert len(kernel_sizes) == num_layers
        if isinstance(strides, int):
            strides = [strides] * num_layers
        else:
            assert len(strides) == num_layers
        if isinstance(dilations, int):
            dilations = [dilations] * num_layers
        else:
            assert len(dilations) == num_layers
        self.dropout_in_module = FairseqDropout(dropout_in, module_name=self.__class__.__name__)
        self.dropout_out_module = FairseqDropout(dropout_out, module_name=self.__class__.__name__)
        self.residual = residual

        self.tdnn = nn.ModuleList([
            TdnnBNReLU(
                in_channels=input_size if layer == 0 else hidden_sizes[layer - 1],
                out_channels=hidden_sizes[layer], kernel_size=kernel_sizes[layer],
                stride=strides[layer], dilation=dilations[layer],
            )
            for layer in range(num_layers)
        ])

        receptive_field_radius = sum(layer.padding for layer in self.tdnn)
        assert chunk_width is None or (chunk_width > 0 and chunk_left_context >= receptive_field_radius)
        if (
            chunk_width is not None and chunk_width > 0
            and chunk_left_context > receptive_field_radius
        ):
            logger.warning("chunk_{{left,right}}_context can be reduced to {}".format(receptive_field_radius))
        self.out_chunk_begin = self.output_lengths(chunk_left_context + 1) - 1
        self.out_chunk_end = self.output_lengths(chunk_left_context + chunk_width) \
            if chunk_width is not None else None
        self.training_stage = training_stage

        self.fc_out = Linear(hidden_sizes[-1], output_size, dropout=self.dropout_out_module.p)
Пример #9
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention=True,
        encoder_embed_dim=512,
        encoder_output_units=512,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        assert encoder_output_units == hidden_size, \
            'encoder_output_units ({}) != hidden_size ({})'.format(encoder_output_units, hidden_size)
        # TODO another Linear layer if not equal

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=encoder_output_units +
                embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])
        self.attention_1 = AttentionLayer(encoder_output_units,
                                          hidden_size) if attention else None
        self.attention_2 = AttentionLayer(encoder_output_units,
                                          hidden_size) if attention else None
        # self.attention_combine_fc = Linear(2 * hidden_size, hidden_size)
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    embed_dim,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
Пример #10
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        encoder_output_units=0,
        attn_type=None,
        attn_dim=0,
        need_attn=False,
        residual=False,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS,
        scheduled_sampling_rate_scheduler=None,
    ):
        super().__init__(dictionary)
        self.dropout_in_module = FairseqDropout(
            dropout_in * 1.0, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out * 1.0, module_name=self.__class__.__name__
        )
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        if attn_type is None or str(attn_type).lower() == "none":
            # no attention, no encoder output needed (language model case)
            need_attn = False
            encoder_output_units = 0
        self.need_attn = need_attn
        self.residual = residual
        self.max_target_positions = max_target_positions
        self.num_layers = num_layers

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units

        self.layers = nn.ModuleList(
            [
                LSTMCell(
                    input_size=encoder_output_units
                    + (embed_dim if layer == 0 else hidden_size),
                    hidden_size=hidden_size,
                )
                for layer in range(num_layers)
            ]
        )

        if attn_type is None or str(attn_type).lower() == "none":
            self.attention = None
        elif str(attn_type).lower() == "bahdanau":
            self.attention = speech_attention.BahdanauAttention(
                hidden_size,
                encoder_output_units,
                attn_dim,
            )
        elif str(attn_type).lower() == "luong":
            self.attention = speech_attention.LuongAttention(
                hidden_size,
                encoder_output_units,
            )
        else:
            raise ValueError("unrecognized attention type.")

        if hidden_size + encoder_output_units != out_embed_dim:
            self.additional_fc = Linear(
                hidden_size + encoder_output_units, out_embed_dim
            )

        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                hidden_size,
                adaptive_softmax_cutoff,
                dropout=dropout_out,
            )
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)

        self.scheduled_sampling_rate_scheduler = scheduled_sampling_rate_scheduler
Пример #11
0
    def __init__(self,
                 dictionary,
                 embed_dim=512,
                 hidden_size=512,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True,
                 encoder_output_units=512,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 adaptive_softmax_cutoff=None,
                 use_scratchpad=False,
                 residual=False):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.use_scratchpad = use_scratchpad
        self.residual = residual

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        if encoder_output_units != hidden_size:
            self.encoder_hidden_proj = Linear(encoder_output_units,
                                              hidden_size)
            self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)
        else:
            self.encoder_hidden_proj = self.encoder_cell_proj = None
        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=hidden_size +
                embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])
        if attention:
            # TODO make bias configurable
            self.attention = AttentionLayer(hidden_size,
                                            encoder_output_units,
                                            hidden_size,
                                            bias=False)
        else:
            self.attention = None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    embed_dim,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
        #EDITED
        if self.use_scratchpad:
            self.attentive_writer = AttentiveWriter(hidden_size,
                                                    encoder_output_units,
                                                    encoder_output_units)
Пример #12
0
    def __init__(self,
                 dictionary,
                 embed_tokens,
                 embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 bidirectional=False,
                 left_pad=False,
                 padding_value=0.,
                 adaptive_softmax=False,
                 adaptive_softmax_cutoff=[],
                 adaptive_softmax_dropout=0.1,
                 adaptive_softmax_factor=None):
        super(LSTMTaggerDecoder, self).__init__(dictionary=dictionary)

        if hasattr(embed_tokens, "embedded_dim"):
            self.in_embed_dim = embed_tokens.embedded_dim
        elif hasattr(embed_tokens, "embed_dim"):
            self.in_embed_dim = embed_tokens.embed_dim
        elif hasattr(embed_tokens, "embedding_dim"):
            self.in_embed_dim = embed_tokens.embedding_dim
        else:
            raise Exception
        self.output_units = self.embed_dim = embed_dim
        self.out_embed_dim = len(dictionary)

        self.num_layers = num_layers
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out

        self.bidirectional = bidirectional
        if self.bidirectional:
            #self.output_units *= 2
            pass

        self.padding_idx = dictionary.pad()
        self.padding_value = 0.
        self.left_pad = left_pad

        self.embed_tokens = embed_tokens

        self.fc_in = self.fc_out1 = self.fc_out2 = None
        if self.in_embed_dim != self.embed_dim:
            self.fc_in = Linear(self.in_embed_dim, self.embed_dim)
        if self.output_units != self.embed_dim:
            self.fc_out1 = Linear(self.output_units, self.embed_dim)
        if self.embed_dim != self.out_embed_dim:
            self.fc_out2 = Linear(self.embed_dim, self.out_embed_dim)

        self.lstm = LSTM(
            input_size=embed_dim,
            hidden_size=embed_dim,
            num_layers=num_layers,
            dropout=self.dropout_out if num_layers > 1 else 0.,
            bidirectional=bidirectional,
        )

        self.adaptive_softmax = None

        if adaptive_softmax:
            self.adaptive_softmax = AdaptiveSoftmax(
                len(dictionary),
                self.embed_dim,
                adaptive_softmax_cutoff,
                dropout=adaptive_softmax_dropout,
                adaptive_inputs=None,
                factor=adaptive_softmax_factor,
                tie_proj=False,
            )
Пример #13
0
    def __init__(
        self,
        dictionary,
        rnn_type="lstm",
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention_type="luong-dot",
        encoder_output_units=512,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS,
        residuals=False,
    ):
        super().__init__(dictionary)
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__
        )
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.max_target_positions = max_target_positions
        self.residuals = residuals
        self.num_layers = num_layers

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = torch.nn.Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        if encoder_output_units != hidden_size and encoder_output_units != 0:
            self.encoder_hidden_proj = torch.nn.Linear(encoder_output_units, hidden_size)
            self.encoder_cell_proj = torch.nn.Linear(encoder_output_units, hidden_size)
        else:
            self.encoder_hidden_proj = self.encoder_cell_proj = None

        # input feeding is described in arxiv.org/abs/1508.04025
        input_feed_size = 0 if encoder_output_units == 0 else hidden_size
        # For Bahdanau, we compute the context on the input feed
        bahd_factor = hidden_size \
            if attention_type in ["bahdanau-dot", "bahdanau-concat", "bahdanau-general", "bahdanau"] \
            else 0
        self.rnn_type = rnn_type
        if rnn_type == "lstm":
            self.layers = LSTM(
                input_size=input_feed_size + embed_dim + bahd_factor,
                hidden_size=hidden_size,
                num_layers=num_layers
            )
        else:
            self.layers = GRU(
                input_size=input_feed_size + embed_dim + bahd_factor,
                hidden_size=hidden_size,
                num_layers=num_layers
            )

        if attention_type == "none":
            self.attention_type = "none"
            self.attention = None
        else:
            self.attention_type = attention_type
            self.attention = Attention(self.attention_type, hidden_size)

        if hidden_size != out_embed_dim:
            self.additional_fc = torch.nn.Linear(hidden_size, out_embed_dim)

        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                hidden_size,
                adaptive_softmax_cutoff,
                dropout=dropout_out,
            )
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
Пример #14
0
    def __init__(
        self,
        rnn_type: Union[str, Namespace],
        dictionary,
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention=True,
        attention_bias=False,  # todo
        encoder_output_units=512,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS,
        residuals=False,
    ):
        super().__init__(dictionary)
        rnn_type = rnn_type.rnn_type if isinstance(rnn_type,
                                                   Namespace) else rnn_type
        self.rnn_type = rnn_type.lower().strip()
        self.is_lstm = True if self.rnn_type == 'lstm' else False  # 方便后面做判断,以便处理lstm的 cell值
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__)
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__)
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.max_target_positions = max_target_positions
        self.residuals = residuals
        self.num_layers = num_layers

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = JqEmbedding(num_embeddings, embed_dim,
                                            padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        if encoder_output_units != hidden_size and encoder_output_units != 0:
            self.encoder_hidden_proj = Linear(encoder_output_units,
                                              hidden_size)
            self.encoder_cell_proj = Linear(
                encoder_output_units, hidden_size) if self.is_lstm else None
        else:
            self.encoder_hidden_proj = self.encoder_cell_proj = None

        # disable input feeding if there is no encoder
        # input feeding is described in arxiv.org/abs/1508.04025
        input_feed_size = 0 if encoder_output_units == 0 else hidden_size

        _, JQRNNCell = get_rnn_cell(self.rnn_type)  # 返回的是方法
        # _ JQRNN

        self.layers = nn.ModuleList([
            JQRNNCell(
                input_size=input_feed_size +
                embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])

        if attention:
            # TODO make bias configurable: Done
            self.attention = AttentionLayer(hidden_size,
                                            encoder_output_units,
                                            hidden_size,
                                            bias=attention_bias)
        else:
            self.attention = None

        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)

        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                hidden_size,
                adaptive_softmax_cutoff,
                dropout=dropout_out,
            )
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)