def __init__(self,
                 dictionary,
                 lang_dictionary,
                 embedding,
                 lang_embedding,
                 embed_dim=512,
                 hidden_size=512,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True,
                 encoder_output_units=1024,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 adaptive_softmax_cutoff=None,
                 lang_embedding_size=32):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.lang_embeddings_size = lang_embedding_size
        self.lang_dictionary = lang_dictionary
        self.embed_langs = lang_embedding

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = embedding

        self.encoder_output_units = encoder_output_units

        self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size)
        self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=hidden_size + embed_dim +
                self.lang_embeddings_size +
                self.encoder_output_units if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])

        self.attention = None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    hidden_size,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
示例#2
0
    def __init__(
        self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512,
        num_layers=1, dropout_in=0.1, dropout_out=0.1, encoder_output_units=0,
        attn_type=None, attn_dim=0, need_attn=False, residual=False, pretrained_embed=None,
        share_input_output_embed=False, adaptive_softmax_cutoff=None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        if attn_type is None or attn_type.lower() == 'none':
            # no attention, no encoder output needed (language model case)
            need_attn = False
            encoder_output_units = 0
        self.need_attn = need_attn
        self.residual = residual

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=encoder_output_units + (embed_dim if layer == 0 else hidden_size),
                hidden_size=hidden_size,
            )
            for layer in range(num_layers)
        ])
        if attn_type is None or attn_type.lower() == 'none':
            self.attention = None
        elif attn_type.lower() == 'bahdanau':
            self.attention = speech_attention.BahdanauAttention(
                hidden_size, encoder_output_units, attn_dim,
            )
        elif attn_type.lower() == 'luong':
            self.attention = speech_attention.LuongAttention(
                hidden_size, encoder_output_units,
            )
        else:
            raise ValueError('unrecognized attention type.')
        if hidden_size + encoder_output_units != out_embed_dim:
            self.additional_fc = Linear(hidden_size + encoder_output_units, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, hidden_size, adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
示例#3
0
    def __init__(
        self,
        dictionary: Dictionary,
        embed_dim: int = 512,
        hidden_size: int = 512,
        out_embed_dim: int = 512,
        num_layers: int = 1,
        dropout_in: float = 0.1,
        dropout_out: float = 0.1,
        attention: bool = True,
        encoder_embed_dim: int = 512,
        encoder_output_units: int = 512,
        pretrained_embed: Optional[nn.Embedding] = None,
        share_input_output_embed: bool = False,
        adaptive_softmax_cutoff: Optional[int] = None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=hidden_size +
                embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])
        self.attention = AttentionLayer(hidden_size, encoder_output_units,
                                        hidden_size) if attention else None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    embed_dim,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
示例#4
0
    def __init__(self,
                 dictionary,
                 lang_dictionary,
                 embed_dim=512,
                 hidden_size=512,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True,
                 encoder_output_units=512,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 adaptive_softmax_cutoff=None,
                 lang_embedding_size=32):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.lang_embedding_size = lang_embedding_size
        self.lang_dictionary = lang_dictionary
        self.embed_langs = nn.Embedding(len(lang_dictionary),
                                        lang_embedding_size)
        self.need_attn = False

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)

        self.encoder_output_units = encoder_output_units
        self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size)
        self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)

        input_size = hidden_size + embed_dim + lang_embedding_size + encoder_output_units
        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=input_size if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])

        self.attention = None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
示例#5
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention=True,
        encoder_embed_dim=512,
        encoder_output_units=512,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        assert encoder_output_units == hidden_size, \
            'encoder_output_units ({}) != hidden_size ({})'.format(encoder_output_units, hidden_size)
        # TODO another Linear layer if not equal

        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=encoder_output_units +
                embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])
        self.attention_1 = AttentionLayer(encoder_output_units,
                                          hidden_size) if attention else None
        self.attention_2 = AttentionLayer(encoder_output_units,
                                          hidden_size) if attention else None
        # self.attention_combine_fc = Linear(2 * hidden_size, hidden_size)
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    embed_dim,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
示例#6
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        encoder_output_units=0,
        attn_type=None,
        attn_dim=0,
        need_attn=False,
        residual=False,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS,
        scheduled_sampling_rate_scheduler=None,
    ):
        super().__init__(dictionary)
        self.dropout_in_module = FairseqDropout(
            dropout_in * 1.0, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out * 1.0, module_name=self.__class__.__name__
        )
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        if attn_type is None or str(attn_type).lower() == "none":
            # no attention, no encoder output needed (language model case)
            need_attn = False
            encoder_output_units = 0
        self.need_attn = need_attn
        self.residual = residual
        self.max_target_positions = max_target_positions
        self.num_layers = num_layers

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units

        self.layers = nn.ModuleList(
            [
                LSTMCell(
                    input_size=encoder_output_units
                    + (embed_dim if layer == 0 else hidden_size),
                    hidden_size=hidden_size,
                )
                for layer in range(num_layers)
            ]
        )

        if attn_type is None or str(attn_type).lower() == "none":
            self.attention = None
        elif str(attn_type).lower() == "bahdanau":
            self.attention = speech_attention.BahdanauAttention(
                hidden_size,
                encoder_output_units,
                attn_dim,
            )
        elif str(attn_type).lower() == "luong":
            self.attention = speech_attention.LuongAttention(
                hidden_size,
                encoder_output_units,
            )
        else:
            raise ValueError("unrecognized attention type.")

        if hidden_size + encoder_output_units != out_embed_dim:
            self.additional_fc = Linear(
                hidden_size + encoder_output_units, out_embed_dim
            )

        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                hidden_size,
                adaptive_softmax_cutoff,
                dropout=dropout_out,
            )
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)

        self.scheduled_sampling_rate_scheduler = scheduled_sampling_rate_scheduler
示例#7
0
    def __init__(self,
                 dictionary,
                 embed_dim=512,
                 hidden_size=512,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True,
                 encoder_output_units=512,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 adaptive_softmax_cutoff=None,
                 use_scratchpad=False,
                 residual=False):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.use_scratchpad = use_scratchpad
        self.residual = residual

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        if encoder_output_units != hidden_size:
            self.encoder_hidden_proj = Linear(encoder_output_units,
                                              hidden_size)
            self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)
        else:
            self.encoder_hidden_proj = self.encoder_cell_proj = None
        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=hidden_size +
                embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            ) for layer in range(num_layers)
        ])
        if attention:
            # TODO make bias configurable
            self.attention = AttentionLayer(hidden_size,
                                            encoder_output_units,
                                            hidden_size,
                                            bias=False)
        else:
            self.attention = None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(num_embeddings,
                                                    embed_dim,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim,
                                 num_embeddings,
                                 dropout=dropout_out)
        #EDITED
        if self.use_scratchpad:
            self.attentive_writer = AttentiveWriter(hidden_size,
                                                    encoder_output_units,
                                                    encoder_output_units)