Пример #1
0
class Encoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V = vocab_size
        D = wordvec_size
        H = hidden_size
        rn = np.random.randn
        embed_w = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False)
        self.params = self.embed.params + self.lstm.params
        self.grads = self.embed.grads + self.lstm.grads
        self.hs = None

    def forward(self, xs):
        xs = self.embed.forward(xs)
        hs = self.lstm.forward(xs)
        self.hs = hs
        return hs[:, -1, :]

    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh
        dout = self.lstm.backward(dhs)
        dout = self.embed.backward(dout)
        return dout
Пример #2
0
 def __init__(self, vocab_size=10000, wordvec_size=650, hidden_size=650, dropout_ratio=0.5):
     V  = vocab_size
     D  = wordvec_size
     H  = hidden_size
     rn = np.random.randn
     # Initialise weight
     embed_W  = (rn(V, D) / 100).astype('f')
     lstm_Wx1 = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
     lstm_Wh1 = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b1  = np.zeros(4 * H).astype('f')
     lstm_Wx2 = (rn(D, 4 * H) / np.sqrt(H)).astype('f')
     lstm_Wh2 = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b2  = np.zeros(4 * H).astype('f')
     affine_b = np.zeros(V).astype('f')
     # Generate layers
     self.layers = [
         TimeEmbedding(embed_W),
         TimeDropout(dropout_ratio),
         TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True),
         TimeDropout(dropout_ratio),
         TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True),
         TimeDropout(dropout_ratio),
         TimeAffine(embed_W.T, affine_b)
     ]
     self.loss_layer     = TimeSoftmaxWithLoss()
     self.softmax        = Softmax()
     self.lstm_layers    = [self.layers[2], self.layers[4]]
     self.dropout_layers = [self.layers[1], self.layers[3], self.layers[5]]
     #Integrate all weight and gradients to a list each
     self.params = []
     self.grads  = []
     for layer in self.layers:
         self.params += layer.params
         self.grads  += layer.grads
Пример #3
0
 def __init__(self, vocab_size, wordvec_size, hidden_size):
     V = vocab_size
     D = wordvec_size
     H = hidden_size
     rn = np.random.randn
     embed_w = (rn(V, D) / 100).astype('f')
     lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
     lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b = np.zeros(4 * H).astype('f')
     self.embed = TimeEmbedding(embed_w)
     self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False)
     self.params = self.embed.params + self.lstm.params
     self.grads = self.embed.grads + self.lstm.grads
     self.hs = None
Пример #4
0
 def __init__(self, vocab_size, wordvec_size, hidden_size):
     V  = vocab_size
     D  = wordvec_size
     H  = hidden_size
     rn = np.random.randn
     # Initialise weight
     embed_W  = (rn(V, D) / 100).astype('f')
     lstm_Wx  = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
     lstm_Wh  = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b   = np.zeros(4 * H).astype('f')
     affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
     affine_b = np.zeros(V).astype('f')
     # Generate layers
     self.layers = [
         TimeEmbedding(embed_W),
         TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True),
         TimeAffine(affine_W, affine_b)
     ]
     self.loss_layer = TimeSoftmaxWithLoss()
     self.lstm_layer = self.layers[1]
     #Integrate all weight and gradients to a list each
     self.params = []
     self.grads  = []
     for layer in self.layers:
         self.params += layer.params
         self.grads  += layer.grads
Пример #5
0
 def __init__(self, vocab_size, wordvec_size, hidden_size):
     V = vocab_size
     D = wordvec_size
     H = hidden_size
     rn = np.random.randn
     embed_w = (rn(V, D) / 100).astype('f')
     lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
     lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
     lstm_b = np.zeros(4 * H).astype('f')
     affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
     affine_b = np.zeros(V).astype('f')
     self.embed = TimeEmbedding(embed_w)
     self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
     self.affine = TimeAffine(affine_W, affine_b)
     self.params = []
     self.grads = []
     for layer in (self.embed, self.lstm, self.affine):
         self.params += layer.params
         self.grads += layer.grads
Пример #6
0
class Decoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V = vocab_size
        D = wordvec_size
        H = hidden_size
        rn = np.random.randn
        embed_w = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)
        self.params = []
        self.grads = []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads

    def forward(self, xs, h):
        self.lstm.set_state(h)
        out = self.embed.forward(xs)
        out = self.lstm.forward(out)
        score = self.affine.forward(out)
        return score

    def backward(self, dscore):
        dout = self.affine.backward(dscore)
        dout = self.lstm.backward(dout)
        dout = self.embed.backward(dout)
        dh = self.lstm.dh
        return dh

    def generate(self, h, start_id, sample_size):
        sampled = []
        sample_id = start_id
        self.lstm.set_state(h)
        for _ in range(sample_size):
            x = np.array(sample_id).reshape((1, 1))
            out = self.embed.forward(x)
            out = self.lstm.forward(out)
            score = self.affine.forward(out)
            sample_id = np.argmax(score.flatten())
            sampled.append(int(sample_id))
        return sampled
 def setUp(self):
     Wx = np.array([
         [
             9.72009451e-01, -4.97642862e-01,  6.45448952e-01,
             8.10387855e-01,  1.13757673e+00, -5.27114694e-01,
             -9.08624540e-01, 1.61896844e+00, -1.16690977e+00,
             3.93476226e-01, -6.04018422e-01,  5.67830817e-01
         ],
         [
              6.68131790e-01,  6.40157016e-01,  6.90200961e-01,
             -1.39750585e+00, -4.89624070e-01,  8.99789953e-01,
              3.97067428e-04,  1.47459503e+00, -4.95030269e-01,
             -9.22541855e-01, -1.57352198e-01, -1.67160494e+00
         ],
         [
              6.93508859e-01, -9.23177216e-01, -4.83511551e-01,
             -1.18675890e+00, -7.35505045e-01, -1.61403611e+00,
             -2.76067694e-02, -2.48294747e-01,  1.14474446e+00,
              1.86354309e-01, -1.73018002e+00, -4.82520536e-03
         ]
     ])
     Wh = np.array([
         [
             -0.88480318,  1.09509583,  0.55657863,
             -0.35096014,  0.18572107,  0.83823659,
             -0.44063768, -0.80897913,  0.35752315,
              1.65812611,  1.40425671,  1.52519905
         ],
         [
             -0.22279229,  1.16363656, -0.47632291,
             -0.16436909, -2.16120359,  0.28362134,
              0.01817155,  0.04836914, -0.30831619,
             -2.0992645 , -0.07302497, -0.72868125
         ],
         [
             -1.40551611,  2.12755955,  1.76232202,
              2.15703084, -1.87387492,  1.22755896,
             -0.84271588,  1.07860737, -0.35473314,
             -0.86293879,  1.67287773,  0.41575087
         ]
     ])
     b  = np.array([
         0.46861655,  0.15954682,  0.38782221,
         1.00791178, -0.38322573,  0.83138721,
         0.98675017, -0.83388618,  1.14392808,
         0.37846653,  0.47617248,  -1.8035631
     ])
     self.time_lstm = TimeLSTM(Wx, Wh, b)
     self.xs = np.array([
         [
             [ 0.71755849,  0.60697008, -0.62888378],
             [-0.49626568, -0.4748135 ,  1.75968249],
             [-0.10438423,  0.28487314,  0.63474513],
             [-0.9923244 ,  0.45072551, -1.64868359],
             [ 1.46760434,  0.35565694, -0.66870418],
             [ 0.35348356, -0.93987496, -2.87130379],
             [-0.29246176, -1.37729218, -0.67958982],
             [-1.4001965 ,  0.55946231,  0.69675162],
             [-1.79238525,  1.57951988,  1.19779083],
             [ 0.87291494,  0.78168426, -0.78577742],
             [ 0.07307044, -1.61895973,  0.9379243 ],
             [ 1.99015425,  0.68183783,  1.77750001]
         ],
         [
             [-0.17596061,  0.81663486,  0.04359994],
             [-0.92350641,  0.45340969,  1.43348315],
             [ 2.33693572,  1.7515518 , -1.3666055 ],
             [ 0.41772987,  0.24850998,  0.03112925],
             [-0.22745121, -0.22542461, -0.31234374],
             [-0.49538611, -0.44607479, -0.0899601 ],
             [-0.94219443,  1.00697691, -0.15114066],
             [-0.08454425, -0.13979634, -0.3159493 ],
             [-0.73268381, -0.15586239,  2.03090773],
             [-1.20719972,  0.2390794 ,  1.85456855],
             [ 0.67798489, -1.25981493,  0.7688309 ],
             [-1.02414315,  1.65732408, -0.29402155]
         ],
         [
             [ 0.07146087, -0.62179875,  0.3156025 ],
             [ 0.44587887,  0.71639604,  0.20851427],
             [ 1.06707963,  1.05846152,  1.49543497],
             [ 0.30010103,  1.20631821,  0.39232967],
             [-0.25315554, -0.32391953, -0.75328256],
             [-0.62199252, -1.39301922,  1.83188775],
             [-0.89011615, -0.5340496 ,  0.93040961],
             [ 0.42915033, -0.17455902,  0.29048757],
             [-1.15432513, -0.29427616, -0.37391368],
             [ 0.97202347, -2.12827099,  1.22032467],
             [ 1.48975681, -1.05964565, -0.64436522],
             [-0.81431589, -0.93004337, -0.10522209]
         ]
     ])
class TestTimeLSTM(unittest.TestCase):
    def setUp(self):
        Wx = np.array([
            [
                9.72009451e-01, -4.97642862e-01,  6.45448952e-01,
                8.10387855e-01,  1.13757673e+00, -5.27114694e-01,
                -9.08624540e-01, 1.61896844e+00, -1.16690977e+00,
                3.93476226e-01, -6.04018422e-01,  5.67830817e-01
            ],
            [
                 6.68131790e-01,  6.40157016e-01,  6.90200961e-01,
                -1.39750585e+00, -4.89624070e-01,  8.99789953e-01,
                 3.97067428e-04,  1.47459503e+00, -4.95030269e-01,
                -9.22541855e-01, -1.57352198e-01, -1.67160494e+00
            ],
            [
                 6.93508859e-01, -9.23177216e-01, -4.83511551e-01,
                -1.18675890e+00, -7.35505045e-01, -1.61403611e+00,
                -2.76067694e-02, -2.48294747e-01,  1.14474446e+00,
                 1.86354309e-01, -1.73018002e+00, -4.82520536e-03
            ]
        ])
        Wh = np.array([
            [
                -0.88480318,  1.09509583,  0.55657863,
                -0.35096014,  0.18572107,  0.83823659,
                -0.44063768, -0.80897913,  0.35752315,
                 1.65812611,  1.40425671,  1.52519905
            ],
            [
                -0.22279229,  1.16363656, -0.47632291,
                -0.16436909, -2.16120359,  0.28362134,
                 0.01817155,  0.04836914, -0.30831619,
                -2.0992645 , -0.07302497, -0.72868125
            ],
            [
                -1.40551611,  2.12755955,  1.76232202,
                 2.15703084, -1.87387492,  1.22755896,
                -0.84271588,  1.07860737, -0.35473314,
                -0.86293879,  1.67287773,  0.41575087
            ]
        ])
        b  = np.array([
            0.46861655,  0.15954682,  0.38782221,
            1.00791178, -0.38322573,  0.83138721,
            0.98675017, -0.83388618,  1.14392808,
            0.37846653,  0.47617248,  -1.8035631
        ])
        self.time_lstm = TimeLSTM(Wx, Wh, b)
        self.xs = np.array([
            [
                [ 0.71755849,  0.60697008, -0.62888378],
                [-0.49626568, -0.4748135 ,  1.75968249],
                [-0.10438423,  0.28487314,  0.63474513],
                [-0.9923244 ,  0.45072551, -1.64868359],
                [ 1.46760434,  0.35565694, -0.66870418],
                [ 0.35348356, -0.93987496, -2.87130379],
                [-0.29246176, -1.37729218, -0.67958982],
                [-1.4001965 ,  0.55946231,  0.69675162],
                [-1.79238525,  1.57951988,  1.19779083],
                [ 0.87291494,  0.78168426, -0.78577742],
                [ 0.07307044, -1.61895973,  0.9379243 ],
                [ 1.99015425,  0.68183783,  1.77750001]
            ],
            [
                [-0.17596061,  0.81663486,  0.04359994],
                [-0.92350641,  0.45340969,  1.43348315],
                [ 2.33693572,  1.7515518 , -1.3666055 ],
                [ 0.41772987,  0.24850998,  0.03112925],
                [-0.22745121, -0.22542461, -0.31234374],
                [-0.49538611, -0.44607479, -0.0899601 ],
                [-0.94219443,  1.00697691, -0.15114066],
                [-0.08454425, -0.13979634, -0.3159493 ],
                [-0.73268381, -0.15586239,  2.03090773],
                [-1.20719972,  0.2390794 ,  1.85456855],
                [ 0.67798489, -1.25981493,  0.7688309 ],
                [-1.02414315,  1.65732408, -0.29402155]
            ],
            [
                [ 0.07146087, -0.62179875,  0.3156025 ],
                [ 0.44587887,  0.71639604,  0.20851427],
                [ 1.06707963,  1.05846152,  1.49543497],
                [ 0.30010103,  1.20631821,  0.39232967],
                [-0.25315554, -0.32391953, -0.75328256],
                [-0.62199252, -1.39301922,  1.83188775],
                [-0.89011615, -0.5340496 ,  0.93040961],
                [ 0.42915033, -0.17455902,  0.29048757],
                [-1.15432513, -0.29427616, -0.37391368],
                [ 0.97202347, -2.12827099,  1.22032467],
                [ 1.48975681, -1.05964565, -0.64436522],
                [-0.81431589, -0.93004337, -0.10522209]
            ]
        ])

    def test_state(self):
        h = np.random.randn(7, 7)
        self.time_lstm.set_state(h)
        assert_array_equal(h, self.time_lstm.h)
        self.time_lstm.reset_state()
        self.assertEqual(None, self.time_lstm.h)
        self.assertEqual(None, self.time_lstm.c)

    def test_forward(self):
        hs = self.time_lstm.forward(self.xs)
        assert_almost_equal(np.array([
            [
                [ 0.5301528 ,  0.4281083 ,  0.31726667],
                [-0.15125458,  0.11109135, -0.81322926],
                [-0.9154287 ,  0.03073275, -0.7118544 ],
                [ 0.01903052,  0.10519677,  0.51448697],
                [ 0.31846583,  0.43916756,  0.15142874],
                [ 0.6307919 ,  0.56641424,  0.12767577],
                [ 0.74492615,  0.41010588,  0.781526  ],
                [-0.21825846,  0.2604803 ,  1.3048488 ],
                [-0.7141188 , -0.24568337,  1.0162368 ],
                [ 0.40743738, -0.7485301 ,  0.15660143],
                [ 0.6559933 , -0.04994468, -0.91404927],
                [ 0.16290103,  0.7862598 , -1.0801914 ]
            ],
            [
                [-0.24008924, -0.39829323,  0.67160046],
                [-0.40160158, -0.37408042, -0.06064677],
                [ 0.12641826,  0.9246878 ,  0.01439308],
                [ 0.45478454, -0.16197163,  0.47862712],
                [ 0.6539599 , -0.21339986,  0.7782586 ],
                [ 0.66772085, -0.29024592,  1.0482595 ],
                [ 0.58890504, -0.7002888 ,  1.2171594 ],
                [ 0.4728623 , -0.85064125,  0.78665596],
                [ 0.09176779, -0.38400438, -0.5185094 ],
                [-0.8482741 , -0.03784448, -0.98402894],
                [-0.09456029,  0.10074405, -0.8739411 ],
                [-1.0006421 , -0.1893259 ,  0.72118574]],
            [
                [ 0.6532587 , -0.03446204, -0.22798873],
                [ 0.00191322,  0.11583798,  0.51861256],
                [-0.10508967, -0.8653865 , -0.27910316],
                [-0.6636366 ,  0.6512917 ,  0.29458234],
                [ 0.60504967, -0.2187419 ,  0.5291746 ],
                [ 0.59398663, -0.07645915, -0.9416629 ],
                [-0.5550622 , -0.00184366, -0.91676426],
                [-0.84544504,  0.24613576, -0.74686164],
                [-0.6791576 , -0.01744348,  0.19078271],
                [ 0.4424824 ,  0.04942419, -0.8943097 ],
                [ 0.83585304,  0.24170044, -0.44091615],
                [ 0.53841674,  0.06517641,  0.5635664 ]
            ]
        ]), hs)

    def test_backward(self):
        dhs = self.time_lstm.forward(self.xs)
        dxs = self.time_lstm.backward(dhs)
        assert_almost_equal(np.array([
            [
                [ 0.13398582, -0.14692113, -0.14036195],
                [ 0.07588957, -0.23084015,  0.13396417],
                [ 0.01643903, -0.13937211,  0.0455663 ],
                [ 0.01059901, -0.25425726, -0.11526247],
                [ 0.41907835, -0.21496703, -0.29487285],
                [ 0.10036876, -0.02304271, -0.00336768],
                [-0.03362638, -0.25355554, -0.00466897],
                [ 0.0426382 , -0.17100458,  0.0088986 ],
                [ 0.15800337, -0.03393397, -0.05257138],
                [-0.38284487,  0.12754066,  0.06260979],
                [-0.11011545, -0.14179969,  0.05320745],
                [ 0.11040838, -0.26075205,  0.00355089]
            ],
            [
                [-0.00721848,  0.10863397, -0.02290008],
                [ 0.02448554,  0.42779642, -0.6901682 ],
                [-0.13468881, -0.00852101, -0.24757166],
                [ 0.46165678,  0.28746065, -0.08837437],
                [-0.061256  , -0.01779122,  0.16727453],
                [ 0.21336646, -0.27075866,  0.00212137],
                [ 0.4284778 , -0.04371689, -0.09592394],
                [-0.03456894, -0.07942928,  0.07711951],
                [ 0.06563383, -0.18600363, -0.01489557],
                [ 0.031198  , -0.13482623,  0.01405822],
                [ 0.05913948, -0.24915774,  0.00096152],
                [ 0.04424267, -0.10381597,  0.01898143]
            ],
            [
                [ 0.06298973, -0.22444938,  0.13032934],
                [-0.04759881,  0.0338732 ,  0.05039414],
                [-0.105564  ,  0.07625255,  0.03889947],
                [-0.146558  , -0.05223962,  0.02283608],
                [ 0.11453921,  0.03403844, -0.05411878],
                [ 0.07570819, -0.3297297 , -0.05588694],
                [ 0.06549456, -0.29991058,  0.0459515 ],
                [ 0.05651642, -0.13871697,  0.01838844],
                [ 0.33854613, -0.6947379 , -0.37949783],
                [-0.14612219, -0.2425579 ,  0.01918346],
                [-0.07906907,  0.05820642, -0.09164417],
                [ 0.19223884, -0.36523458, -0.4367506 ]
            ]
        ]), dxs)
Пример #9
0
class PeekyDecoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V = vocab_size
        D = wordvec_size
        H = hidden_size
        rn = np.random.randn
        embed_w = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(H + D, 4 * H) / np.sqrt(H + D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H + H, V) / np.sqrt(H + H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)
        self.params = []
        self.grads = []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads
        self.cache = None

    def forward(self, xs, h):
        N, T = xs.shape
        N, H = h.shape
        self.lstm.set_state(h)
        out = self.embed.forward(xs)
        hs = np.repeat(h, T, axis=0).reshape(N, T, H)
        out = np.concatenate((hs, out), axis=2)
        out = self.lstm.forward(out)
        out = np.concatenate((hs, out), axis=2)
        score = self.affine.forward(out)
        self.cache = H
        return score

    def backward(self, dscore):
        H = self.cache
        dout = self.affine.backward(dscore)
        dout = dout[:, :, H:]
        dhs0 = dout[:, :, :H]
        dout = self.lstm.backward(dout)
        dembed = dout[:, :, H:]
        dhs1 = dout[:, :, :H]
        self.embed.backward(dembed)
        dhs = dhs0 + dhs1
        dh = self.lstm.dh + np.sum(dhs, axis=1)
        return dh

    def generate(self, h, start_id, sample_size):
        sampled = []
        char_id = start_id
        self.lstm.set_state(h)
        H = h.shape[1]
        peeky_h = h.reshape(1, 1, H)
        for _ in range(sample_size):
            x = np.array([char_id]).reshape((1, 1))
            out = self.embed.forward(x)
            out = np.concatenate((peeky_h, out), axis=2)
            out = self.lstm.forward(out)
            out = np.concatenate((peeky_h, out), axis=2)
            score = self.affine.forward(out)
            char_id = np.argmax(score.flatten())
            sampled.append(char_id)
        return sampled