Exemplo n.º 1
0
 def _get_embed(self, embed, vocab_size, embed_size, initializer, dropout,
                prefix):
     """ Construct an embedding block. """
     if embed is None:
         assert embed_size is not None, '"embed_size" cannot be None if "word_embed" or ' \
                                        'token_type_embed is not given.'
         with self.name_scope():
             embed = nn.HybridSequential(prefix=prefix)
             with embed.name_scope():
                 embed.add(
                     nn.Embedding(input_dim=vocab_size,
                                  output_dim=embed_size,
                                  weight_initializer=initializer))
                 if dropout:
                     embed.add(nn.Dropout(rate=dropout))
     assert isinstance(embed, Block)
     return embed
Exemplo n.º 2
0
 def __init__(self, vocab_size, embedding_dim, num_classes, **kwargs):
     super(FastTextClassificationModel, self).__init__(**kwargs)
     with self.name_scope():
         self.vocab_size = vocab_size
         self.embedding_dim = embedding_dim
         self.embedding = nn.Embedding(
             self.vocab_size,
             self.embedding_dim,
             weight_initializer=mx.init.Xavier(),
             dtype='float32')
         num_output_units = num_classes
         if num_classes == 2:
             num_output_units = 1
         logging.info('Number of output units in the last layer :%s',
                      num_output_units)
         self.agg_layer = MeanPoolingLayer()
         self.dense = nn.Dense(num_output_units)
Exemplo n.º 3
0
    def __init__(self,
                 mode,
                 vocab_size,
                 embed_dim,
                 hidden_dim,
                 num_layers,
                 dropout=0.5,
                 **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        with self.name_scope():
            self.drop = nn.Dropout(dropout)
            self.encoder = nn.Embedding(
                vocab_size, embed_dim, weight_initializer=mx.init.Uniform(0.1))

            if mode == 'rnn_relu':
                self.rnn = rnn.RNN(hidden_dim,
                                   num_layers,
                                   activation='relu',
                                   dropout=dropout,
                                   input_size=embed_dim)

            elif mode == 'rnn_tanh':
                self.rnn = rnn.RNN(hidden_dim,
                                   num_layers,
                                   dropout=dropout,
                                   input_size=embed_dim)

            elif mode == 'lstm':
                self.rnn = rnn.LSTM(hidden_dim,
                                    num_layers,
                                    dropout=dropout,
                                    input_size=embed_dim)

            elif mode == 'gru':
                self.rnn = rnn.GRU(hidden_dim,
                                   num_layers,
                                   dropout=dropout,
                                   input_size=embed_dim)

            else:
                raise ValueError(
                    "Invalid mode %s. Options are rnn_relu, rnn_tanh, lstm and grb"
                    % mode)

            self.decoder = nn.Dense(vocab_size, in_units=hidden_dim)
            self.hidden_dim = hidden_dim
    def __init__(self, nwords, nword_dims, word_hidden_size, word_nlayers,
                 sentence_hidden_size, sentence_nlayers, ndoc_dims, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        with self.name_scope():
            self.embedding = nn.Embedding(nwords, nword_dims)
            self.word_rnn = gluon.rnn.LSTM(word_hidden_size,
                                           num_layers=word_nlayers,
                                           layout='NTC',
                                           bidirectional=True)
            self.sentence_rnn = gluon.rnn.LSTM(sentence_hidden_size,
                                               num_layers=sentence_nlayers,
                                               layout='NTC',
                                               bidirectional=True)
            self.fully_encoder = nn.Dense(ndoc_dims,
                                          activation='tanh',
                                          flatten=False)
Exemplo n.º 5
0
    def __init__(self,
                 mode,
                 vocab_size,
                 num_embed,
                 num_hidden,
                 num_layers,
                 dropout=0.5,
                 tie_weights=False,
                 **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        with self.name_scope():
            self.drop = nn.Dropout(dropout)
            self.encoder = nn.Embedding(vocab_size, num_embed)
            if mode == 'rnn_relu':
                self.rnn = rnn.RNN(num_hidden,
                                   'relu',
                                   num_layers,
                                   dropout=dropout,
                                   input_size=num_embed)
            elif mode == 'rnn_tanh':
                self.rnn = rnn.RNN(num_hidden,
                                   num_layers,
                                   dropout=dropout,
                                   input_size=num_embed)
            elif mode == 'lstm':
                self.rnn = rnn.LSTM(num_hidden,
                                    num_layers,
                                    dropout=dropout,
                                    input_size=num_embed)
            elif mode == 'gru':
                self.rnn = rnn.GRU(num_hidden,
                                   num_layers,
                                   dropout=dropout,
                                   input_size=num_embed)
            else:
                raise ValueError("Invalid mode %s. Options are rnn_relu, "
                                 "rnn_tanh, lstm, and gru" % mode)

            if tie_weights:
                self.decoder = nn.Dense(vocab_size,
                                        in_units=num_hidden,
                                        params=self.encoder.params)
            else:
                self.decoder = nn.Dense(vocab_size, in_units=num_hidden)

            self.num_hidden = num_hidden
 def __init__(self,
              vocab_size,
              hidden_size,
              prefix=None,
              params=None,
              use_tuple=False):
     super(RNNDecoder2, self).__init__(prefix=prefix, params=params)
     self._vocab_size = vocab_size
     self._use_tuple = use_tuple
     with self.name_scope():
         self._embed = nn.Embedding(input_dim=vocab_size,
                                    output_dim=hidden_size)
         self._rnn1 = rnn.RNNCell(input_size=hidden_size,
                                  hidden_size=hidden_size)
         self._rnn2 = rnn.RNNCell(input_size=hidden_size,
                                  hidden_size=hidden_size)
         self._map_to_vocab = nn.Dense(vocab_size, in_units=hidden_size)
Exemplo n.º 7
0
def net_define_eu():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Embedding(config.MAX_WORDS, config.EMBEDDING_DIM))
        net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.2))
        net.add(transpose(axes=(0,2,1)))
        net.add(nn.GlobalMaxPool1D())
        '''
        net.add(FeatureBlock1())
        '''
        net.add(extendDim(axes=3))
        net.add(PrimeConvCap(16, 32, kernel_size=(1,1), padding=(0,0),strides=(1,1)))
        net.add(CapFullyNGBlock(16, num_cap=12, input_units=32, units=16, route_num=3))
        net.add(nn.Dropout(0.2))
        net.add(nn.Dense(6, activation='sigmoid'))
    net.initialize(init=init.Xavier())
    return net
Exemplo n.º 8
0
    def __init__(self, hidden_dim, output_dim, num_layers, max_seq_len,
                 drop_prob, alignment_dim, encoder_hidden_dim, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.max_seq_len = max_seq_len
        self.encoder_hidden_dim = encoder_hidden_dim
        self.hidden_size = hidden_dim
        self.num_layers = num_layers
        with self.name_scope():
            # hidden_dim is decoder_hidden_dim
            # output_dim is len(output_vocab)
            self.embedding = nn.Embedding(output_dim, hidden_dim)
            self.dropout = nn.Dropout(drop_prob)

            # 注意力机制。
            self.attention = nn.Sequential()
            with self.attention.name_scope():
                # the layer output (*V, in_units) -> (*V, alignment_dim)
                # namely (*V, hidden_dim + encoder_hidden_dim) -> (*V, alignment_dim)
                self.attention.add(
                    # alignment_dim is 25
                    nn.Dense(alignment_dim,
                             in_units=hidden_dim + encoder_hidden_dim,
                             activation="tanh",
                             flatten=False))

                # the layer output (*V, alignment_dim) -> (*V, 1)
                # activation = None
                self.attention.add(
                    nn.Dense(1, in_units=alignment_dim, flatten=False))

            self.rnn = rnn.GRU(hidden_dim,
                               num_layers,
                               dropout=drop_prob,
                               input_size=hidden_dim)

            # the layer output (*V, hidden_dim) -> (*V, output_dim)
            # activation = None
            self.out = nn.Dense(output_dim, in_units=hidden_dim, flatten=False)

            # the layer output (*V, hidden_dim + encoder_hidden_dim) -> (*V, hidden_dim)
            # activation = None
            self.rnn_concat_input = nn.Dense(hidden_dim,
                                             in_units=hidden_dim +
                                             encoder_hidden_dim,
                                             flatten=False)
 def __init__(self,
              vocab_size,
              num_hiddens,
              ffn_num_hiddens,
              num_heads,
              num_layers,
              dropout,
              use_bias=False,
              **kwargs):
     super(TransformerEncoder, self).__init__(**kwargs)
     self.num_hiddens = num_hiddens
     self.embedding = nn.Embedding(vocab_size, num_hiddens)
     self.pos_encoding = PositionalEncoding(num_hiddens, dropout)
     self.blks = nn.Sequential()
     for _ in range(num_layers):
         self.blks.add(
             EncoderBlock(num_hiddens, ffn_num_hiddens, num_heads, dropout,
                          use_bias))
Exemplo n.º 10
0
def SequentialTextCNN(config):

    net = nn.Sequential()
    with net.name_scope():
        net.add(
            nn.Embedding(input_dim=config['vocab_size'],
                         output_dim=config['embedding_dim']))
        net.add(nn.Lambda(lambda x: x.transpose((0, 2, 1))))
        net.add(
            nn.Conv1D(channels=config['feature_map'],
                      kernel_size=config['kernel_size'][0],
                      strides=1))
        net.add(nn.BatchNorm(axis=1))
        net.add(nn.Activation('relu'))
        net.add(nn.GlobalMaxPool1D())
        net.add(nn.Dropout(rate=config['dropout_rate']))
        net.add(nn.Dense(units=2))
    return net
 def __init__(self, vocab_size, word_embed_size, hidden_size,
              dropout=0., intra_attention=False, **kwargs):
     super(NLIModel, self).__init__(**kwargs)
     self.word_embed_size = word_embed_size
     self.hidden_size = hidden_size
     self.use_intra_attention = intra_attention
     with self.name_scope():
         self.dropout_layer = nn.Dropout(dropout)
         self.word_emb = nn.Embedding(vocab_size, word_embed_size)
         self.lin_proj = nn.Dense(hidden_size, in_units=word_embed_size,
                                  flatten=False, use_bias=False)
         if self.use_intra_attention:
             self.intra_attention = IntraSentenceAttention(hidden_size, hidden_size, dropout)
             input_size = hidden_size * 2
         else:
             self.intra_attention = None
             input_size = hidden_size
         self.model = DecomposableAttention(input_size, hidden_size, 3, dropout)
Exemplo n.º 12
0
    def __init__(self, vocab_size, embedding_dim, hidden_dim, batch_size,
                 **kwargs):
        super(AttentionDecoder, self).__init__(**kwargs)
        with self.name_scope():
            self.hidden_dim = hidden_dim
            self.batch_size = batch_size

            self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
            self.gru = rnn.GRU(hidden_dim,
                               input_size=embedding_dim,
                               layout="NTC")
            # 各系列のGRUの隠れ層とAttention層で計算したコンテ
            # hidden_dim*2としているのはキストベクトルを
            # つなぎ合わせることで長さが2倍になるため
            # self.hidden2linear = nn.Dense(vocab_size,in_units=hidden_dim * 2)
            self.hidden2linear = nn.Dense(vocab_size,
                                          in_units=hidden_dim * 2,
                                          flatten=False)
Exemplo n.º 13
0
 def _get_embedding(self):
     prefix = 'embedding0_'
     if self._sparse_weight:
         embedding = nn.Sequential(prefix=prefix)
     else:
         embedding = nn.HybridSequential(prefix=prefix)
     with embedding.name_scope():
         if self._sparse_weight:
             # sparse embedding has both sparse weight and sparse grad
             embed = contrib.nn.SparseEmbedding(self._vocab_size, self._embed_size,
                                                prefix=prefix)
         else:
             embed = nn.Embedding(self._vocab_size, self._embed_size, prefix=prefix,
                                  sparse_grad=self._sparse_grad)
         embedding.add(embed)
         if self._embed_dropout:
             embedding.add(nn.Dropout(self._embed_dropout))
     return embedding
Exemplo n.º 14
0
def net_define():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Embedding(config.MAX_WORDS, config.EMBEDDING_DIM))
        net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=2, dropout=0.2))
        net.add(transpose(axes=(0,2,1)))
        # net.add(nn.MaxPool2D(pool_size=(config.MAX_LENGTH,1)))
        # net.add(nn.Conv2D(128, kernel_size=(101,1), padding=(50,0), groups=128,activation='relu'))
        net.add(PrimeConvCap(8,32, kernel_size=(1,1), padding=(0,0)))
        # net.add(AdvConvCap(8,32,8,32, kernel_size=(1,1), padding=(0,0)))
        net.add(CapFullyBlock(8*(config.MAX_LENGTH)/2, num_cap=12, input_units=32, units=16, route_num=5))
        # net.add(CapFullyBlock(8*(config.MAX_LENGTH-8), num_cap=12, input_units=32, units=16, route_num=5))
        # net.add(CapFullyBlock(8, num_cap=12, input_units=32, units=16, route_num=5))
        net.add(nn.Dropout(0.2))
        # net.add(LengthBlock())
        net.add(nn.Dense(6, activation='sigmoid'))
    net.initialize(init=init.Xavier())
    return net
Exemplo n.º 15
0
    def __init__(self, n_hidden, vocab_size, embed_dim, max_seq_length,
                 **kwargs):
        super(korean_autospacing_base, self).__init__(**kwargs)
        # 입력 시퀀스 길이
        self.in_seq_len = max_seq_length
        # 출력 시퀀스 길이
        self.out_seq_len = max_seq_length
        # GRU의 hidden 개수
        self.n_hidden = n_hidden
        # 고유문자개수
        self.vocab_size = vocab_size
        # max_seq_length
        self.max_seq_length = max_seq_length
        # 임베딩 차원수
        self.embed_dim = embed_dim

        with self.name_scope():
            self.embedding = nn.Embedding(input_dim=self.vocab_size,
                                          output_dim=self.embed_dim)

            self.conv_unigram = nn.Conv2D(channels=128,
                                          kernel_size=(1, self.embed_dim))

            self.conv_bigram = nn.Conv2D(channels=256,
                                         kernel_size=(2, self.embed_dim),
                                         padding=(1, 0))

            self.conv_trigram = nn.Conv2D(channels=128,
                                          kernel_size=(3, self.embed_dim),
                                          padding=(1, 0))

            self.conv_forthgram = nn.Conv2D(channels=64,
                                            kernel_size=(4, self.embed_dim),
                                            padding=(2, 0))

            self.conv_fifthgram = nn.Conv2D(channels=32,
                                            kernel_size=(5, self.embed_dim),
                                            padding=(2, 0))

            self.bi_gru = rnn.GRU(hidden_size=self.n_hidden,
                                  layout='NTC',
                                  bidirectional=True)
            self.dense_sh = nn.Dense(100, activation='relu', flatten=False)
            self.dense = nn.Dense(1, activation='sigmoid', flatten=False)
Exemplo n.º 16
0
    def __init__(self, config, **kwargs):
        super(TextCNN, self).__init__(**kwargs)

        V = config.vocab_size
        E = config.embedding_dim
        Nf = config.num_filters
        Ks = config.kernel_sizes
        C = config.num_classes
        Dr = config.dropout_prob

        with self.name_scope():
            self.embedding = nn.Embedding(V, E)  # embedding layer

            # three different convolutional layers
            self.conv1 = Conv_Max_Pooling(Nf, Ks[0])
            self.conv2 = Conv_Max_Pooling(Nf, Ks[1])
            self.conv3 = Conv_Max_Pooling(Nf, Ks[2])
            self.dropout = nn.Dropout(Dr)  # a dropout layer
            self.fc1 = nn.Dense(C)  # a dense layer for classification
Exemplo n.º 17
0
 def __init__(self, n_classes   = 2, 
                    kernel_size = 8, 
                    embed_size  = 300, 
                    dropout     = 0.5, 
                    seq_len     = 500, 
                    vocab_size  = 7500, 
                    **kwargs):
     super(CNN, self).__init__(**kwargs)
     self.dropout = False
     with self.name_scope():
         self.encoder = nn.Embedding(vocab_size, embed_size, 
                     weight_initializer = mx.init.Uniform(0.1))
         self.conv = nn.Conv2D(embed_size, (kernel_size, embed_size),1)
         self.act = nn.Activation('relu')
         self.pool = nn.MaxPool2D((seq_len-kernel_size+1, 1))
         if dropout > 0.0:
             self.dropout = True
             self.drop = nn.Dropout(dropout)
         self.decoder = nn.Dense(n_classes)
Exemplo n.º 18
0
def load_model():
    num_classes = 2
    num_hidden = 25
    num_embed = 300
    learning_rate = .01
    epochs = 200
    batch_size = 20
    voca_size = 10000
    context = mx.gpu()

    model_params_filename = "lstm_net.params_epoch4"
    model = nn.Sequential()
    with model.name_scope():
        model.embed = nn.Embedding(voca_size, num_embed)
        model.add(rnn.LSTM(num_hidden, layout = 'NTC', dropout=0.7, bidirectional=False))
        model.add(nn.Dense(num_classes))

    model.load_params(model_params_filename, context)
    return model
Exemplo n.º 19
0
 def __init__(self,
              mode,
              vocab_size,
              embed_size,
              num_hiddens,
              num_layers,
              drop_prob=0.5,
              **kwargs):
     super(RNNModel, self).__init__(**kwargs)
     with self.name_scope():
         self.dropout = nn.Dropout(drop_prob)
         # 将词索引变换成词向量。这些词向量也是模型参数。
         self.embedding = nn.Embedding(vocab_size,
                                       embed_size,
                                       weight_initializer=init.Uniform(0.1))
         if mode == 'rnn_relu':
             self.rnn = rnn.RNN(num_hiddens,
                                num_layers,
                                activation='relu',
                                dropout=drop_prob,
                                input_size=embed_size)
         elif mode == 'rnn_tanh':
             self.rnn = rnn.RNN(num_hiddens,
                                num_layers,
                                activation='tanh',
                                dropout=drop_prob,
                                input_size=embed_size)
         elif mode == 'lstm':
             self.rnn = rnn.LSTM(num_hiddens,
                                 num_layers,
                                 dropout=drop_prob,
                                 input_size=embed_size)
         elif mode == 'gru':
             self.rnn = rnn.GRU(num_hiddens,
                                num_layers,
                                dropout=drop_prob,
                                input_size=embed_size)
         else:
             raise ValueError('Invalid mode %s. Options are rnn_relu, '
                              'rnn_tanh, lstm, and gru' % mode)
         self.dense = nn.Dense(vocab_size, in_units=num_hiddens)
         self.num_hiddens = num_hiddens
Exemplo n.º 20
0
    def __init__(self, token_to_idx, subword_function, embedding_size,
                 weight_initializer=None, sparse_grad=True, dtype='float32',
                 **kwargs):
        super(FasttextEmbeddingModel,
              self).__init__(embedding_size=embedding_size, **kwargs)
        self.token_to_idx = token_to_idx
        self.subword_function = subword_function
        self.weight_initializer = weight_initializer
        self.sparse_grad = sparse_grad
        self.dtype = dtype

        with self.name_scope():
            self.embedding = nn.Embedding(
                len(token_to_idx), embedding_size,
                weight_initializer=weight_initializer, sparse_grad=sparse_grad,
                dtype=dtype)
            self.subword_embedding = _MaskedSumEmbedding(
                len(subword_function), embedding_size,
                weight_initializer=weight_initializer, sparse_grad=sparse_grad,
                dtype=dtype)
Exemplo n.º 21
0
    def __init__(self, vocab_size, tag2idx, embedding_dim, hidden_dim):
        super(BiLSTM_CRF, self).__init__()
        with self.name_scope():
            self.embedding_dim = embedding_dim
            self.hidden_dim = hidden_dim
            self.vocab_size = vocab_size
            self.tag2idx = tag2idx
            self.tagset_size = len(tag2idx)

            self.word_embeds = nn.Embedding(vocab_size, embedding_dim)
            self.lstm = rnn.LSTM(hidden_dim // 2, num_layers=1, bidirectional=True)

            # Maps the output of the LSTM into tag space.
            self.hidden2tag = nn.Dense(self.tagset_size)

            # Matrix of transition parameters.  Entry i,j is the score of
            # transitioning *to* i *from* j.
            self.transitions = nd.random_normal(shape=(self.tagset_size, self.tagset_size))

            self.hidden = self.init_hidden()
    def __init__(self, nwords, nword_dims, nhiddens, nlayers, natt_units, natt_hops, nfc, nclass,
                 drop_prob, pool_way, nprune_p=None, nprune_q=None, **kwargs):
        super(SelfAttentiveBiLSTM, self).__init__(**kwargs)
        with self.name_scope():
            self.embedding_layer = nn.Embedding(nwords, nword_dims)
            self.bilstm = rnn.LSTM(
                nhiddens, num_layers=nlayers, dropout=drop_prob, bidirectional=True)
            self.att_encoder = SelfAttention(natt_units, natt_hops)
            self.dense = nn.Dense(nfc, activation='tanh')
            self.output_layer = nn.Dense(nclass)

            self.dense_p, self.dense_q = None, None
            if all([nprune_p, nprune_q]):
                self.dense_p = nn.Dense(
                    nprune_p, activation='tanh', flatten=False)
                self.dense_q = nn.Dense(
                    nprune_q, activation='tanh', flatten=False)

            self.drop_prob = drop_prob
            self.pool_way = pool_way
Exemplo n.º 23
0
 def __init__(self, vocab_size, emb_size, hidden_size, num_layers=2, dropout=.3, \
              bidir=True, latent_size=64, **kwargs):
     '''
     init this class, create relevant rnns
     '''
     super(VAEEncoder, self).__init__(**kwargs)
     with self.name_scope():
         self.hidden_size = hidden_size
         self.hidden_factor = (2 if bidir else 1) * num_layers
         self.embedding_layer = nn.Embedding(vocab_size, emb_size)
         self.original_encoder = rnn.LSTM(hidden_size=hidden_size, num_layers=num_layers, \
                                          dropout=dropout, bidirectional=bidir, \
                                          prefix='VAEEncoder_org_encoder')
         self.paraphrase_encoder = rnn.LSTM(hidden_size=hidden_size, num_layers=num_layers, \
                                            dropout=dropout, bidirectional=bidir, \
                                            prefix='VAEEncoder_prp_encoder')
         # dense layers calculating mu and lv to sample, since the length of the input is
         # flexible, we need to use RNN
         self.output_mu = nn.Dense(units=latent_size)
         self.output_sg = nn.Dense(units=latent_size)
Exemplo n.º 24
0
def test_summary():
    net = gluon.model_zoo.vision.resnet50_v1()
    net.initialize()
    net.summary(mx.nd.ones((32, 3, 224, 224)))

    net2 = nn.Sequential()
    with net2.name_scope():
        net2.add(nn.Embedding(10, 20))
        net2.add(gluon.rnn.LSTM(30))
        net2.add(nn.Dense(40, flatten=False))
    net2.initialize()
    net2.summary(mx.nd.ones((80, 32)))

    net3 = gluon.rnn.LSTM(30)
    net3.initialize()
    begin_state = net3.begin_state(32)
    net3.summary(mx.nd.ones((80, 32, 5)), begin_state)

    net.hybridize()
    assert_raises(AssertionError, net.summary, mx.nd.ones((32, 3, 224, 224)))
Exemplo n.º 25
0
    def __init__(self,
                 mode,
                 vocab_size,
                 num_embed,
                 num_hidden,
                 num_layers,
                 dropout=0.5,
                 **kwargs):
        super(GluonRNNModel, self).__init__(**kwargs)
        with self.name_scope():
            self.drop = nn.Dropout(dropout)
            self.encoder = nn.Embedding(
                vocab_size, num_embed, weight_initializer=mx.init.Uniform(0.1))

            if mode == 'lstm':
                #  we create a LSTM layer with certain number of hidden LSTM cell and layers
                #  in our example num_hidden is 1000 and num of layers is 2
                #  The input to the LSTM will only be passed during the forward pass (see forward function below)
                self.rnn = rnn.LSTM(num_hidden,
                                    num_layers,
                                    dropout=dropout,
                                    input_size=num_embed)
            elif mode == 'gru':
                #  we create a GRU layer with certain number of hidden GRU cell and layers
                #  in our example num_hidden is 1000 and num of layers is 2
                #  The input to the GRU will only be passed during the forward pass (see forward function below)
                self.rnn = rnn.GRU(num_hidden,
                                   num_layers,
                                   dropout=dropout,
                                   input_size=num_embed)
            else:
                #  we create a vanilla RNN layer with certain number of hidden vanilla RNN cell and layers
                #  in our example num_hidden is 1000 and num of layers is 2
                #  The input to the vanilla will only be passed during the forward pass (see forward function below)
                self.rnn = rnn.RNN(num_hidden,
                                   num_layers,
                                   activation='relu',
                                   dropout=dropout,
                                   input_size=num_embed)
            self.decoder = nn.Dense(vocab_size, in_units=num_hidden)
            self.num_hidden = num_hidden
Exemplo n.º 26
0
    def __init__(self, vocab_size, num_layers=2, units=128, hidden_size=2048, num_heads=4,
                 activation='approx_gelu', two_stream: bool = False, scaled=True, dropout=0.0,
                 attention_dropout=0.0, use_residual=True, clamp_len: typing.Optional[int] = None,
                 use_decoder=True, tie_decoder_weight=True, weight_initializer=None,
                 bias_initializer='zeros', prefix=None, params=None):
        super().__init__(prefix=prefix, params=params)
        assert units % num_heads == 0, 'In TransformerDecoder, the units should be divided ' \
                                       'exactly by the number of heads. Received units={}, ' \
                                       'num_heads={}'.format(units, num_heads)

        self._num_layers = num_layers
        self._units = units
        self._hidden_size = hidden_size
        self._num_heads = num_heads
        self._two_stream = two_stream
        assert not two_stream, 'Not yet implemented.'
        self._dropout = dropout
        self._use_residual = use_residual
        self._clamp_len = clamp_len
        with self.name_scope():
            self.word_embed = nn.Embedding(vocab_size, units)
            self.mask_embed = self.params.get('mask_embed', shape=(1, 1, units))
            self.pos_embed = PositionalEmbedding(units)
            if dropout:
                self.dropout_layer = nn.Dropout(rate=dropout)

            self.transformer_cells = nn.HybridSequential()
            for i in range(num_layers):
                attention_cell = RelativeSegmentEmbeddingPositionalEmbeddingMultiHeadAttentionCell(
                    d_head=units // num_heads, num_heads=num_heads, scaled=scaled,
                    dropout=attention_dropout)
                self.transformer_cells.add(
                    XLNetCell(attention_cell=attention_cell, units=units, hidden_size=hidden_size,
                              num_heads=num_heads, activation=activation,
                              weight_initializer=weight_initializer,
                              bias_initializer=bias_initializer, dropout=dropout, scaled=scaled,
                              use_residual=use_residual, prefix='transformer%d_' % i))
            if use_decoder:
                self.decoder = nn.Dense(
                    vocab_size, flatten=False,
                    params=self.word_embed.params if tie_decoder_weight else None)
Exemplo n.º 27
0
    def __init__(self, vocab_size, word_vocab_size, num_embed, ker_width, num_filters, word_len, hw_layers, lstm_layers, lstm_units, dropout, **kwargs):
        super(LSTMCharWord, self).__init__(**kwargs)
        with self.name_scope():
            self.features = nn.Sequential()
            self.features.add(nn.Embedding(vocab_size, num_embed))

            hw_units = num_embed
            if (ker_width is not None) and (num_filters is not None):
                self.features.add(CharCNN(ker_width, num_embed, num_filters, word_len))
                hw_units = sum(num_filters)

            if hw_layers is not None:
                for _ in range(hw_layers):
                    self.features.add(HighwayLayer(hw_units))

            self.rnn = rnn.LSTM(lstm_units, num_layers=lstm_layers, layout='NTC', dropout=dropout,
                                input_size=hw_units)

            self.drop = nn.Dropout(dropout)

            self.decoder = Decoder(word_vocab_size, lstm_units)
Exemplo n.º 28
0
 def __init__(self,
              vocab_size,
              emb_size,
              hidden_size,
              num_layers=2,
              dropout=.3,
              bidir=True,
              **kwargs):
     '''
     init this class, create relevant rnns, note: we will share the original sentence encoder
     between VAE encoder and VAE decoder
     '''
     super(VAEDecoder, self).__init__(**kwargs)
     with self.name_scope():
         self.embedding_layer = nn.Embedding(vocab_size, emb_size)
         self.paraphrase_decoder = rnn.LSTM(hidden_size=hidden_size, num_layers=num_layers, \
                                            dropout=dropout, bidirectional=bidir, \
                                            prefix='VAEDecoder_prp_decoder')
         # the `output_size` should be set eqaul to the vocab size (a probablity distribution
         # over all words in vocabulary)
         self.dense_output = nn.Dense(vocab_size, activation='tanh')
Exemplo n.º 29
0
 def __init__(self, 
              mode,
              vocab_size,
              num_embed,
              num_hidden,
              num_layers,
              dropout_rnn=0.25,
              dropout=0.25,
              **kwargs):
     super(rnn_net, self).__init__(**kwargs)
     with self.name_scope():
         self.drop = nn.Dropout(dropout_rnn)
         self.mlp = nn.Dense(units=num_hidden, activation='relu')
         self.encoder = nn.Embedding(vocab_size, num_embed, weight_initializer=mx.init.Uniform(0.1))
         if mode == 'lstm':
             self._rnn = rnn.LSTM(num_hidden, num_layers, layout='NTC', dropout=dropout_rnn, input_size=num_embed)
         elif mode == 'gru':
             self._rnn = rnn.GRU(num_hidden, num_layers, layout='NTC', dropout=dropout_rnn, input_size=num_embed)
         else:
             self._rnn = rnn.RNN(num_hidden, num_layers, layout='NTC', activation='relu', dropout=dropout_rnn, input_size=num_embed)
         self.num_hidden = num_hidden
Exemplo n.º 30
0
    def __init__(self,
                 num_tokens,
                 embedding_size,
                 weight_initializer=None,
                 sparse_grad=True,
                 dtype='float32',
                 **kwargs):
        super(_MaskedSumEmbedding, self).__init__(**kwargs)
        self.num_tokens = num_tokens
        self.embedding_size = embedding_size
        self.weight_initializer = weight_initializer
        self.sparse_grad = sparse_grad
        self.dtype = dtype

        with self.name_scope():
            self.embedding = nn.Embedding(
                num_tokens,
                embedding_size,
                weight_initializer=weight_initializer,
                sparse_grad=sparse_grad,
                dtype=dtype)