Пример #1
0
    def check_forward(self, c_prev1_data, c_prev2_data, x1_data, x2_data):
        c_prev1 = chainer.Variable(c_prev1_data)
        c_prev2 = chainer.Variable(c_prev2_data)
        x1 = chainer.Variable(x1_data)
        x2 = chainer.Variable(x2_data)
        c, h = functions.slstm(c_prev1, c_prev2, x1, x2)
        self.assertEqual(c.data.dtype, numpy.float32)
        self.assertEqual(h.data.dtype, numpy.float32)

        # Compute expected out
        a1_in = self.x1[:, [0, 4]]
        i1_in = self.x1[:, [1, 5]]
        f1_in = self.x1[:, [2, 6]]
        o1_in = self.x1[:, [3, 7]]
        a2_in = self.x2[:, [0, 4]]
        i2_in = self.x2[:, [1, 5]]
        f2_in = self.x2[:, [2, 6]]
        o2_in = self.x2[:, [3, 7]]

        c_expect = _sigmoid(i1_in) * numpy.tanh(a1_in) + \
            _sigmoid(i2_in) * numpy.tanh(a2_in) + \
            _sigmoid(f1_in) * self.c_prev1 + \
            _sigmoid(f2_in) * self.c_prev2
        h_expect = _sigmoid(o1_in + o2_in) * numpy.tanh(c_expect)

        gradient_check.assert_allclose(c_expect, c.data)
        gradient_check.assert_allclose(h_expect, h.data)
Пример #2
0
    def check_forward(self, c_prev1_data, c_prev2_data, x1_data, x2_data):
        c_prev1 = chainer.Variable(c_prev1_data)
        c_prev2 = chainer.Variable(c_prev2_data)

        x1 = chainer.Variable(x1_data)
        x2 = chainer.Variable(x2_data)

        c, h = functions.slstm(c_prev1, c_prev2, x1, x2)


        # Compute expected out
        a1_in = self.x1[:, [0, 4]]
        i1_in = self.x1[:, [1, 5]]
        f1_in = self.x1[:, [2, 6]]
        o1_in = self.x1[:, [3, 7]]
        a2_in = self.x2[:, [0, 4]]
        i2_in = self.x2[:, [1, 5]]
        f2_in = self.x2[:, [2, 6]]
        o2_in = self.x2[:, [3, 7]]

        c_expect = _sigmoid(i1_in) * numpy.tanh(a1_in) + \
                   _sigmoid(i2_in) * numpy.tanh(a2_in) + \
                   _sigmoid(f1_in) * self.c_prev1 + \
                   _sigmoid(f2_in) * self.c_prev2
        h_expect = _sigmoid(o1_in + o2_in) * numpy.tanh(c_expect)

        print("state      = ",numpy.allclose(c_expect, c.data))
        print("hidden     = ",numpy.allclose(h_expect, h.data))
Пример #3
0
    def __call__(self, tree):
        # skip the node if whose child is only one
        while len(tree.children) == 1 and not tree.is_leaf():
            tree = tree.children[0]
        if tree.is_leaf():
            word = tree.get_word()
            # avg
            if self.is_leaf_as_chunk:
                vector = None
                for tok in word.split('/'):
                    embed = self.get_word_vec(tok)
                    if vector is None:
                        vector = self.embed2hidden(embed)
                    else:
                        vector += self.embed2hidden(embed)
                vector /= len(word.split('/'))
            else:
                embed = self.get_word_vec(word)
                vector = self.embed2hidden(embed)
            c = Variable(np.zeros((1, self.mem_units), dtype=np.float32))
        else:
            left_tree, right_tree = tree.children
            leftc = self(left_tree)
            rightc = self(right_tree)
            # skip the node if whose child is only one
            while len(left_tree.children) == 1 and not left_tree.is_leaf():
                left_tree = left_tree.children[0]
            while len(right_tree.children) == 1 and not right_tree.is_leaf():
                right_tree = right_tree.children[0]
            left_vec = left_tree.data['vector']
            right_vec = right_tree.data['vector']

            # composition by tree lstm
            left_attention_vec = self.calc_attention(left_tree)
            right_attention_vec = self.calc_attention(right_tree)
            concat = F.concat(
                (left_vec, right_vec, left_attention_vec, right_attention_vec))
            u_l = self.updatel(concat)
            u_r = self.updater(concat)
            i_l = self.inputl(concat)
            i_r = self.inputr(concat)
            if self.comp_type == Composition.tree_attention_lstm:
                concatl = F.concat((left_vec, left_attention_vec))
                concatr = F.concat((right_vec, right_attention_vec))
                f_l = self.forgetl(concatr)
                f_r = self.forgetr(concatl)
            elif self.comp_type == Composition.attention_slstm:
                f_l = self.forgetl(concat)
                f_r = self.forgetr(concat)
            o_l = self.outputl(concat)
            o_r = self.outputr(concat)
            l_v = F.concat((u_l, i_l, f_l, o_l))
            r_v = F.concat((u_r, i_r, f_r, o_r))
            c, vector = F.slstm(leftc, rightc, l_v, r_v)

        tree.data['vector'] = vector
        if tree.is_root():
            self.calc_attention(tree)
        return c
Пример #4
0
    def check_forward(self, inputs, backend_config):
        c_prev1, c_prev2, x1, x2 = inputs
        c_expect, h_expect = self.forward_cpu(inputs)

        if backend_config.use_cuda:
            inputs = cuda.to_gpu(inputs)
        inputs = [chainer.Variable(xx) for xx in inputs]

        with backend_config:
            c, h = functions.slstm(*inputs)
            assert c.data.dtype == self.dtype
            assert h.data.dtype == self.dtype

        testing.assert_allclose(c_expect, c.data, **self.check_forward_options)
        testing.assert_allclose(h_expect, h.data, **self.check_forward_options)
Пример #5
0
    def check_forward(self, inputs, backend_config):
        c_prev1, c_prev2, x1, x2 = inputs
        c_expect, h_expect = self.forward_cpu(inputs)

        if backend_config.use_cuda:
            inputs = cuda.to_gpu(inputs)
        inputs = [chainer.Variable(xx) for xx in inputs]

        with backend_config:
            c, h = functions.slstm(*inputs)
            assert c.data.dtype == self.dtype
            assert h.data.dtype == self.dtype

        testing.assert_allclose(
            c_expect, c.data, **self.check_forward_options)
        testing.assert_allclose(
            h_expect, h.data, **self.check_forward_options)
 def expr_for_tree(self, tree, decorate=False):
     if tree.isleaf():
         return zeros((1, self.hdim)), self.embed(makevar(self.w2i.get(tree.label, 0)))
     if len(tree.children) == 1:
         assert (tree.children[0].isleaf())
         c0, e0 = self.expr_for_tree(tree.children[0])
         c, h = F.lstm(c0, self.WU(e0))
         if decorate:
             tree._e = (c, h)
         return c, h
     assert (len(tree.children) == 2), tree.children[0]
     c1, e1 = self.expr_for_tree(tree.children[0], decorate)
     c2, e2 = self.expr_for_tree(tree.children[1], decorate)
     c, h = F.slstm(c1, c2, self.W1(e1), self.W2(e2))
     if decorate:
         tree._e = (c, h)
     return c, h
Пример #7
0
 def forward(self, inputs, device):
     c1, c2, x1, x2 = inputs
     out = functions.slstm(c1, c2, x1, x2)
     return out
Пример #8
0
    def __call__(self, train, x_batch, y_batch=None):
        model = self
        n_units = self.__n_units
        mod = self.__mod
        gpu = self.__gpu
        batch_size = len(x_batch)
        x_len = len(x_batch[0])
        depth = int(log(x_len, 2)) + 1

        self.reset_state()

        list_a = [[] for i in range(2**depth - 1)]
        list_c = [[] for i in range(2**depth - 1)]
        zeros = mod.zeros((batch_size, n_units), dtype=np.float32)
        for l in xrange(x_len):
            x_data = mod.array([x_batch[k][l] for k in range(batch_size)])
            x_data = Variable(x_data, volatile=not train)
            x_data = model.h_lstm(F.dropout(x_data, ratio=0.2, train=train))
            list_a[x_len - 1 + l] = x_data
            list_c[x_len - 1 +
                   l] = model.h_lstm.c  #Variable(zeros, volatile=not train)

        for d in reversed(range(1, depth)):
            for s in range(2**d - 1, 2**(d + 1) - 1, 2):
                l = model.h_x(F.dropout(list_a[s], ratio=0.2, train=train))
                r = model.h_h(F.dropout(list_a[s + 1], ratio=0.2, train=train))
                c_l = list_c[s]
                c_r = list_c[s + 1]
                c, h = F.slstm(c_l, c_r, l, r)
                list_a[(s - 1) / 2] = h
                list_c[(s - 1) / 2] = c

        list_p = []
        list_h = []
        for a in list_a:
            n_hs = F.split_axis(a, 2, axis=0)
            list_p.append(n_hs[0])
            list_h.append(n_hs[1])

        list_pq = F.concat(
            [F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_p],
            axis=1)
        list_aoa = []
        for d in reversed(range(1, depth)):
            for s in range(2**d - 1, 2**(d + 1) - 1, 1):
                a = self.__attend_fast(list_pq, list_h[s], batch_size / 2,
                                       train)
                # a = self.__attend_f_tree(list_p[:], list_pq, list_h[s], batch_size/2, train)
                hs = model.m_lstm(F.dropout(a, ratio=0.2, train=train))
                list_aoa.append(hs)
        list_aoa = F.concat([
            F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_aoa[:-1]
        ],
                            axis=1)
        hs = self.__attend_fast(list_aoa, hs, batch_size / 2, train)

        model.m_lstm.reset_state()

        list_pq = F.concat(
            [F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_h],
            axis=1)
        list_aoa = []
        for d in reversed(range(1, depth)):
            for s in range(2**d - 1, 2**(d + 1) - 1, 1):
                a = self.__attend_fast(list_pq, list_p[s], batch_size / 2,
                                       train)
                # a = self.__attend_f_tree(list_h[:], list_pq, list_p[s], batch_size/2, train)
                hs1 = model.m_lstm(F.dropout(a, ratio=0.2, train=train))
                list_aoa.append(hs1)
        list_aoa = F.concat([
            F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_aoa[:-1]
        ],
                            axis=1)
        hs1 = self.__attend_fast(list_aoa, hs1, batch_size / 2, train)

        hs = F.relu(model.h_l1(F.concat([hs, hs1], axis=1)))
        y = model.l_y(F.dropout(hs, ratio=0.2, train=train))
        preds = mod.argmax(y.data, 1).tolist()

        accum_loss = 0 if train else None
        if train:
            if gpu >= 0:
                y_batch = cuda.to_gpu(y_batch)
            lbl = Variable(y_batch, volatile=not train)
            accum_loss = F.softmax_cross_entropy(y, lbl)

        return preds, accum_loss, y