示例#1
0
def rnn_decoder(decoder_params):
    decoder_embedding_layer = DropoutEmbeddings(
        ntokens=decoder_params.ntokens,
        emb_size=decoder_params.emb_size,
    )

    if decoder_params.attention:
        # attention decoder must have double the input_size to accommodate for the attention concat
        decoder_rnn = RNNLayers(input_size=decoder_params.emb_size * 2,
                                output_size=decoder_params.emb_size,
                                nhid=decoder_params.nhid,
                                bidir=False,
                                nlayers=decoder_params.nlayers,
                                cell_type="gru")
        projection_layer = AttentionProjection(
            output_size=decoder_params.ntokens,
            input_size=decoder_params.emb_size,
            att_nhid=decoder_params.att_hid,
            tie_encoder=None,
            dropout=0.0)
        decoder = AttentionDecoder(decoder_layer=decoder_rnn,
                                   embedding_layer=decoder_embedding_layer,
                                   projection_layer=projection_layer,
                                   pad_token=1,
                                   eos_token=2,
                                   max_tokens=decoder_params.max_tokens)

    else:

        decoder_rnn = RNNLayers(input_size=decoder_params.emb_size,
                                output_size=decoder_params.emb_size,
                                nhid=decoder_params.nhid,
                                bidir=False,
                                nlayers=decoder_params.nlayers,
                                cell_type="gru")
        projection_layer = Projection(output_size=decoder_params.ntokens,
                                      input_size=decoder_params.emb_size,
                                      dropout=0.0,
                                      tie_encoder=None)
        decoder = Decoder(
            decoder_layer=decoder_rnn,
            projection_layer=projection_layer,
            embedding_layer=decoder_embedding_layer,
            pad_token=0,
            eos_token=1,
            max_tokens=decoder_params.max_tokens,
        )
    decoder = to_gpu(decoder)
    decoder.reset(decoder_params.batch_size)
    return decoder, decoder_params
示例#2
0
def test_attention_projection(attention_projection_setup):
    encoder_outputs, decoder_output, params = attention_projection_setup
    module = to_gpu(AttentionProjection(**params))
    # When I reset the module
    module.reset(keys=encoder_outputs)
    # the attention output will be a zeros array with shape equal to the input
    assert to_np(module.get_attention_output(decoder_output)).sum() == 0
    assert module.get_attention_output(decoder_output) is not module._attention_output
    # when when I pass an input for the the decoder output
    results = module(decoder_output)
    assert_dims(results, [1, 2, params['n_out']])
    # the new attention_output is calculated from he attention module and is no longer zero
    assert to_np(module.get_attention_output(decoder_output)).sum() != 0
    assert module.get_attention_output(decoder_output) is module._attention_output
    assert_dims(module._attention_output, [2, params['n_in']])
示例#3
0
    def __init__(self, ntoken: HParam, emb_sz: HParam, nhid: HParam, nlayers: HParam, att_nhid: int, pad_token: int,
                 eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True,
                 bidir: bool = False, **kwargs):
        """

        Args:
            ntoken (Union[List[int],int]): Number of tokens for the encoder and the decoder
            emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings
            nhid (Union[List[int],int]): Number of hidden dims for the encoder and the decoder
            nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder
            att_nhid (int): Number of hidden dims for the attention Module
            pad_token (int): The  index of the token used for padding
            eos_token (int): The index of the token used for eos
            max_tokens (int): The maximum number of steps the decoder iterates before stopping
            share_embedding_layer (bool): if True the decoder shares its input and output embeddings
            tie_decoder (bool): if True the encoder and the decoder share their embeddings
            bidir (bool): if True use a bidirectional encoder
            **kwargs: Extra embeddings that will be passed to the encoder and the decoder
        """
        super(Seq2Seq, self).__init__()
        # allow for the same or different parameters between encoder and decoder
        ntoken, emb_sz, nhid, nlayers = get_list(ntoken, 2), get_list(emb_sz, 2), \
                                        get_list(nhid, 2), get_list(nlayers, 2)
        if "dropoutd" in kwargs:
            dropoutd = kwargs.pop("dropoutd")
        else:
            dropoutd = 0.5
        self.encoder = EmbeddingRNNEncoder(ntoken=ntoken[0], emb_sz=emb_sz[0], nhid=nhid[0], nlayers=nlayers[0],
                                           pad_token=pad_token, bidir=bidir, **kwargs)

        self.decoder = RNNAttentionDecoder(ntoken=ntoken[-1], emb_sz=emb_sz[-1], nhid=nhid[-1], nlayers=nlayers[-1],
                                           pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens,
                                           # Share the embedding layer between encoder and decoder
                                           embedding_layer=self.encoder.encoder_with_dropout.embed if share_embedding_layer else None,
                                           # potentially tie the output projection with the decoder embedding
                                           **kwargs
                                           )
        enc = self.decoder.encoder if tie_decoder else None
        self.decoder.projection_layer = AttentionProjection(n_out=ntoken[-1],
                                                            n_in=emb_sz[-1],
                                                            dropout=dropoutd,
                                                            att_nhid=att_nhid,
                                                            tie_encoder=enc if tie_decoder else None
                                                            )
        self.nlayers = nlayers
        self.nhid = nhid
        self.emb_sz = emb_sz
示例#4
0
def rnn_decoder(decoder_params):
    if decoder_params.attention:
        decoder = RNNAttentionDecoder(cell_type="gru", ntoken=decoder_params.ntokens,
                                      emb_sz=decoder_params.emb_size, nhid=decoder_params.nhid,
                                      nlayers=decoder_params.nlayers,
                                      pad_token=1, eos_token=2,
                                      max_tokens=decoder_params.max_tokens)
        decoder.projection_layer = AttentionProjection(n_out=decoder_params.ntokens,
                                                       n_in=decoder_params.emb_size,
                                                       att_nhid=decoder_params.att_hid,
                                                       tie_encoder=None,
                                                       dropout=0.0)

    else:
        decoder = EmbeddingRNNDecoder(cell_type="gru", ntoken=decoder_params.ntokens,
                                      emb_sz=decoder_params.emb_size, nhid=decoder_params.nhid,
                                      nlayers=decoder_params.nlayers,
                                      pad_token=1, eos_token=2,
                                      max_tokens=decoder_params.max_tokens)
        decoder.projection_layer = Projection(n_out=decoder_params.ntokens,
                                              n_in=decoder_params.emb_size, tie_encoder=None, dropout=0.0)
    decoder = to_gpu(decoder)
    decoder.reset(decoder_params.batch_size)
    return decoder, decoder_params
    def __init__(self,
                 ntoken: HParam,
                 emb_sz: HParam,
                 nhid: HParam,
                 nlayers: HParam,
                 att_nhid: int,
                 pad_token: int,
                 eos_token: int,
                 max_tokens: int = 50,
                 share_embedding_layer: bool = False,
                 tie_decoder: bool = True,
                 bidir: bool = False,
                 **kwargs):
        """

        Args:
            ntoken (Union[List[int],int]): Number of tokens for the encoder and the decoder
            emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings
            nhid (Union[List[int],int]): Number of hidden dims for the encoder and the decoder
            nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder
            att_nhid (int): Number of hidden dims for the attention Module
            pad_token (int): The  index of the token used for padding
            eos_token (int): The index of the token used for eos
            max_tokens (int): The maximum number of steps the decoder iterates before stopping
            share_embedding_layer (bool): if True the decoder shares its input and output embeddings
            tie_decoder (bool): if True the encoder and the decoder share their embeddings
            bidir (bool): if True use a bidirectional encoder
            **kwargs: Extra embeddings that will be passed to the encoder and the decoder
        """
        super().__init__()
        # allow for the same or different parameters between encoder and decoder
        ntoken, emb_sz, nhid, nlayers = get_list(ntoken, 2), get_list(emb_sz, 2), \
                                        get_list(nhid, 2), get_list(nlayers, 2)
        dropoutd = get_kwarg(kwargs, name="dropoutd",
                             default_value=0.5)  # output dropout
        dropoute = get_kwarg(kwargs, name="dropout_e",
                             default_value=0.1)  # encoder embedding dropout
        dropoute = get_list(dropoute, 2)
        dropouti = get_kwarg(kwargs, name="dropout_i",
                             default_value=0.65)  # input dropout
        dropouti = get_list(dropouti, 2)
        dropouth = get_kwarg(kwargs, name="dropout_h",
                             default_value=0.3)  # RNN output layers dropout
        dropouth = get_list(dropouth, 2)
        wdrop = get_kwarg(kwargs, name="wdrop",
                          default_value=0.5)  # RNN weights dropout
        wdrop = get_list(wdrop, 2)
        cell_type = get_kwarg(kwargs, name="cell_type", default_value="lstm")

        self.nlayers = nlayers
        self.nhid = nhid
        self.emb_sz = emb_sz
        self.pr_force = 1.0

        encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0],
                                                    emb_size=emb_sz[0],
                                                    dropoute=dropoute[0],
                                                    dropouti=dropouti[0])

        encoder_rnn = RNNLayers(
            input_size=emb_sz[0],
            output_size=kwargs.get("output_size", emb_sz[0]),
            nhid=nhid[0],
            bidir=bidir,
            dropouth=dropouth[0],
            wdrop=wdrop[0],
            nlayers=nlayers[0],
            cell_type=cell_type,
        )
        self.encoder = Encoder(embedding_layer=encoder_embedding_layer,
                               encoder_layer=encoder_rnn)

        if share_embedding_layer:
            decoder_embedding_layer = encoder_embedding_layer
        else:
            decoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[-1],
                                                        emb_size=emb_sz[-1],
                                                        dropoute=dropoute[1],
                                                        dropouti=dropouti[1])

        decoder_rnn = RNNLayers(input_size=kwargs.get("input_size",
                                                      emb_sz[-1] * 2),
                                output_size=kwargs.get("output_size",
                                                       emb_sz[-1]),
                                nhid=nhid[-1],
                                bidir=False,
                                dropouth=dropouth[1],
                                wdrop=wdrop[1],
                                nlayers=nlayers[-1],
                                cell_type=cell_type)

        projection_layer = AttentionProjection(
            output_size=ntoken[-1],
            input_size=emb_sz[-1],
            dropout=dropoutd,
            att_nhid=att_nhid,
            tie_encoder=decoder_embedding_layer if tie_decoder else None)
        self.decoder = AttentionDecoder(
            decoder_layer=decoder_rnn,
            projection_layer=projection_layer,
            embedding_layer=decoder_embedding_layer,
            pad_token=pad_token,
            eos_token=eos_token,
            max_tokens=max_tokens,
        )