Пример #1
0
class Encoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V = vocab_size
        D = wordvec_size
        H = hidden_size
        rn = np.random.randn
        embed_w = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False)
        self.params = self.embed.params + self.lstm.params
        self.grads = self.embed.grads + self.lstm.grads
        self.hs = None

    def forward(self, xs):
        xs = self.embed.forward(xs)
        hs = self.lstm.forward(xs)
        self.hs = hs
        return hs[:, -1, :]

    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh
        dout = self.lstm.backward(dhs)
        dout = self.embed.backward(dout)
        return dout
Пример #2
0
 def __init__(self, vocab_size, wordvec_size, hidden_size):
     V = vocab_size
     D = wordvec_size
     H = hidden_size
     rn = np.random.randn
     embed_w = (rn(V, D) / 100).astype('f')
     lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
     lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b = np.zeros(4 * H).astype('f')
     self.embed = TimeEmbedding(embed_w)
     self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False)
     self.params = self.embed.params + self.lstm.params
     self.grads = self.embed.grads + self.lstm.grads
     self.hs = None
 def __init__(self, vocab_size, wordvec_size, hidden_size):
     V  = vocab_size
     D  = wordvec_size
     H  = hidden_size
     rn = np.random.randn
     # Initialise weight
     embed_W  = (rn(V, D) / 100).astype('f')
     rnn_Wx   = (rn(D, H) / np.sqrt(D)).astype('f')
     rnn_Wh   = (rn(H, H) / np.sqrt(H)).astype('f')
     rnn_b    = np.zeros(H).astype('f')
     affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
     affine_b = np.zeros(V).astype('f')
     # Generate layers
     self.layers = [
         TimeEmbedding(embed_W),
         TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
         TimeAffine(affine_W, affine_b)
     ]
     self.loss_layer = TimeSoftmaxWithLoss()
     self.rnn_layer  = self.layers[1]
     #Integrate all weight and gradients to a list each
     self.params = []
     self.grads  = []
     for layer in self.layers:
         self.params += layer.params
         self.grads  += layer.grads
Пример #4
0
 def __init__(self, vocab_size=10000, wordvec_size=650, hidden_size=650, dropout_ratio=0.5):
     V  = vocab_size
     D  = wordvec_size
     H  = hidden_size
     rn = np.random.randn
     # Initialise weight
     embed_W  = (rn(V, D) / 100).astype('f')
     lstm_Wx1 = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
     lstm_Wh1 = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b1  = np.zeros(4 * H).astype('f')
     lstm_Wx2 = (rn(D, 4 * H) / np.sqrt(H)).astype('f')
     lstm_Wh2 = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b2  = np.zeros(4 * H).astype('f')
     affine_b = np.zeros(V).astype('f')
     # Generate layers
     self.layers = [
         TimeEmbedding(embed_W),
         TimeDropout(dropout_ratio),
         TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True),
         TimeDropout(dropout_ratio),
         TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True),
         TimeDropout(dropout_ratio),
         TimeAffine(embed_W.T, affine_b)
     ]
     self.loss_layer     = TimeSoftmaxWithLoss()
     self.softmax        = Softmax()
     self.lstm_layers    = [self.layers[2], self.layers[4]]
     self.dropout_layers = [self.layers[1], self.layers[3], self.layers[5]]
     #Integrate all weight and gradients to a list each
     self.params = []
     self.grads  = []
     for layer in self.layers:
         self.params += layer.params
         self.grads  += layer.grads
Пример #5
0
class Decoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V = vocab_size
        D = wordvec_size
        H = hidden_size
        rn = np.random.randn
        embed_w = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)
        self.params = []
        self.grads = []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads

    def forward(self, xs, h):
        self.lstm.set_state(h)
        out = self.embed.forward(xs)
        out = self.lstm.forward(out)
        score = self.affine.forward(out)
        return score

    def backward(self, dscore):
        dout = self.affine.backward(dscore)
        dout = self.lstm.backward(dout)
        dout = self.embed.backward(dout)
        dh = self.lstm.dh
        return dh

    def generate(self, h, start_id, sample_size):
        sampled = []
        sample_id = start_id
        self.lstm.set_state(h)
        for _ in range(sample_size):
            x = np.array(sample_id).reshape((1, 1))
            out = self.embed.forward(x)
            out = self.lstm.forward(out)
            score = self.affine.forward(out)
            sample_id = np.argmax(score.flatten())
            sampled.append(int(sample_id))
        return sampled
Пример #6
0
 def __init__(self, vocab_size, wordvec_size, hidden_size):
     V = vocab_size
     D = wordvec_size
     H = hidden_size
     rn = np.random.randn
     embed_w = (rn(V, D) / 100).astype('f')
     lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
     lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b = np.zeros(4 * H).astype('f')
     affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
     affine_b = np.zeros(V).astype('f')
     self.embed = TimeEmbedding(embed_w)
     self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
     self.affine = TimeAffine(affine_W, affine_b)
     self.params = []
     self.grads = []
     for layer in (self.embed, self.lstm, self.affine):
         self.params += layer.params
         self.grads += layer.grads
Пример #7
0
 def __init__(self, **kwargs):
     super(ModelTest, self).__init__()
     self.time_emb = TimeEmbedding(20, 64)
Пример #8
0
class PeekyDecoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V = vocab_size
        D = wordvec_size
        H = hidden_size
        rn = np.random.randn
        embed_w = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(H + D, 4 * H) / np.sqrt(H + D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H + H, V) / np.sqrt(H + H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)
        self.params = []
        self.grads = []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads
        self.cache = None

    def forward(self, xs, h):
        N, T = xs.shape
        N, H = h.shape
        self.lstm.set_state(h)
        out = self.embed.forward(xs)
        hs = np.repeat(h, T, axis=0).reshape(N, T, H)
        out = np.concatenate((hs, out), axis=2)
        out = self.lstm.forward(out)
        out = np.concatenate((hs, out), axis=2)
        score = self.affine.forward(out)
        self.cache = H
        return score

    def backward(self, dscore):
        H = self.cache
        dout = self.affine.backward(dscore)
        dout = dout[:, :, H:]
        dhs0 = dout[:, :, :H]
        dout = self.lstm.backward(dout)
        dembed = dout[:, :, H:]
        dhs1 = dout[:, :, :H]
        self.embed.backward(dembed)
        dhs = dhs0 + dhs1
        dh = self.lstm.dh + np.sum(dhs, axis=1)
        return dh

    def generate(self, h, start_id, sample_size):
        sampled = []
        char_id = start_id
        self.lstm.set_state(h)
        H = h.shape[1]
        peeky_h = h.reshape(1, 1, H)
        for _ in range(sample_size):
            x = np.array([char_id]).reshape((1, 1))
            out = self.embed.forward(x)
            out = np.concatenate((peeky_h, out), axis=2)
            out = self.lstm.forward(out)
            out = np.concatenate((peeky_h, out), axis=2)
            score = self.affine.forward(out)
            char_id = np.argmax(score.flatten())
            sampled.append(char_id)
        return sampled
Пример #9
0
    def __init__(self,
                 conf,
                 char_embed_matrix=None,
                 term_embed_matrix=None,
                 name='hybridconvmodel.h5',
                 train_embed=False,
                 train_top=True):
        self.batch_size = conf.batch_size
        self.MAX_LEN = conf.MAX_LEN
        self.PE = conf.PE
        self.name = name
        #char
        input = Input(shape=(conf.MAX_LEN, ), dtype='int32')
        topic_in = Input(shape=(20, ), dtype='float32')
        if char_embed_matrix is None:
            x = Embedding(conf.V, 32)(input)
        else:
            embed1 = Embedding(char_embed_matrix.shape[0],
                               char_embed_matrix.shape[1],
                               weights=[char_embed_matrix],
                               trainable=train_embed)
            x = embed1(input)
            xt_repeat = RepeatVector(conf.MAX_LEN)(topic_in)
            x = Concatenate()([x, xt_repeat])
        if self.PE:
            e_input = Input(shape=(conf.MAX_LEN, ),
                            dtype='int32',
                            name='PE_in')
            ex = Embedding(self.MAX_LEN, 32, name='PE')(e_input)
            if conf.CPE:
                ex2 = TimeEmbedding()(e_input)
                x = Concatenate()([x, ex, ex2])
            else:
                x = Concatenate()([x, ex])

        hs_char = self.feed_forward(x, train_top)

        input_term = Input(shape=(conf.MAX_LEN_TERM, ), dtype='int32')
        if term_embed_matrix is None:
            xterm = Embedding(conf.V, 32)(input_term)
        else:
            embed1 = Embedding(term_embed_matrix.shape[0],
                               term_embed_matrix.shape[1],
                               weights=[term_embed_matrix],
                               trainable=train_embed)
            xterm = embed1(input_term)
            xt_repeat = RepeatVector(conf.MAX_LEN_TERM)(topic_in)
            xterm = Concatenate()([xterm, xt_repeat])
            # xterm = Dense(64, activation='relu')(xterm)
        if conf.PE:
            eterm_input = Input(shape=(conf.MAX_LEN_TERM, ),
                                dtype='int32',
                                name='PE_term_in')
            ex_term = Embedding(conf.MAX_LEN_TERM, 32,
                                name='PEterm')(eterm_input)
            if conf.CPE:
                ex_term2 = TimeEmbedding()(eterm_input)
                xterm = Concatenate()([xterm, ex_term, ex_term2])
            else:
                xterm = Concatenate()([xterm, ex_term])
        hs_term = self.feed_forward(xterm, train_top)

        # l1_weight = 5e-6, kernel_regularizer=l1(l1_weight)
        input_feat = Input(shape=(conf.NUM_FEAT, ), dtype='float32')
        hfeat = Dense(8, activation='relu', trainable=train_top)(input_feat)
        if conf.use_tfidf:
            l1_weight = 5e-6
            NV = 10000
            ds_dim = 128
            tfidf_in = Input(shape=(NV, ), dtype='float32')
            term_tfidf_in = Input(shape=(NV, ), dtype='float32')
            htfidf = Dense(ds_dim,
                           activation='relu',
                           trainable=train_top,
                           kernel_regularizer=l1(l1_weight))(tfidf_in)
            hterm_tfidf = Dense(
                ds_dim,
                activation='relu',
                trainable=train_top,
                kernel_regularizer=l1(l1_weight))(term_tfidf_in)
            hs = Concatenate()(
                [hs_char, hs_term, hfeat, topic_in, htfidf, hterm_tfidf])
            z = Dense(128, activation='relu', trainable=train_top)(hs)
        else:
            hs = Concatenate()([hs_char, hs_term, hfeat, topic_in])
            # hs = BatchNormalization()(hs)
            z = Dense(128, activation='relu', trainable=train_top)(hs)
        # z = BatchNormalization()(z)
        z = Dense(conf.C, activation='softmax', trainable=train_top)(z)
        if self.PE:
            if conf.use_tfidf:
                model = Model([
                    input, e_input, input_term, eterm_input, input_feat,
                    topic_in, tfidf_in, term_tfidf_in
                ], z)
            else:
                model = Model([
                    input, e_input, input_term, eterm_input, input_feat,
                    topic_in
                ], z)
        else:
            model = Model([input, input_term, input_feat, topic_in], z)
        # opt = Adagrad(lr=lr)
        opt = Adam(lr=conf.lr)
        model.compile(opt, 'categorical_crossentropy', metrics=['acc'])
        self.model = model