Пример #1
0
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            batch_size, x_dim, output_dim = self.rng.random_integers(2000, size=3)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            W = self.get_orthogonal_matrix(x_dim, output_dim)
            b = self.rng.rand(1, output_dim).astype(np.float32) if self.rng.randint(2) else None
            device_id = 0

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            context = Context()
            x_gpu = Connector(Matrix.from_npa(x), device_id)
            W_gpu = Connector(Matrix.from_npa(W), device_id)
            b_gpu = Connector(Matrix.from_npa(b), device_id) if b is not None else b
            dot_block_gpu = DotBlock(W_gpu, b_gpu, x_gpu)
            x_gpu.fprop()
            W_gpu.fprop()
            if b_gpu:
                b_gpu.fprop()
            dot_block_gpu.fprop()
            _, dL_doutput = dot_block_gpu.output.register_usage(device_id, device_id)
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            dL_doutput.assign(context, Matrix.from_npa(random_matrix, 'float'))
            dot_block_gpu.bprop()
            if b is not None:
                dL_db_gpu = b_gpu.backward_matrix.to_host()
            dL_dW_gpu = W_gpu.backward_matrix.to_host()
            dL_dx_gpu = x_gpu.backward_matrix.to_host()

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            context = Context()
            x_cpu = Connector(Matrix.from_npa(x), device_id)
            W_cpu = Connector(Matrix.from_npa(W), device_id)
            b_cpu = Connector(Matrix.from_npa(b), device_id) if b is not None else b
            dot_block_cpu = DotBlock(W_cpu, b_cpu, x_cpu)
            x_cpu.fprop()
            W_cpu.fprop()
            if b_cpu:
                b_cpu.fprop()
            dot_block_cpu.fprop()
            _, dL_doutput = dot_block_cpu.output.register_usage(device_id, device_id)
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            dL_doutput.assign(context, Matrix.from_npa(random_matrix, 'float'))
            dot_block_cpu.bprop()
            if b is not None:
                dL_db_cpu = b_cpu.backward_matrix.to_host()
            dL_dW_cpu = W_cpu.backward_matrix.to_host()
            dL_dx_cpu = x_cpu.backward_matrix.to_host()

            r.append(np.allclose(dL_dx_gpu, dL_dx_cpu, atol=1e-5))
            r.append(np.allclose(dL_dW_gpu, dL_dW_cpu, atol=1e-5))
            if b is not None:
                r.append(np.allclose(dL_db_gpu, dL_db_cpu, atol=1e-5))

        self.assertEqual(sum(r), len(r))
Пример #2
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            batch_size, x_dim, output_dim = self.rng.random_integers(2000,
                                                                     size=3)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            W = self.get_orthogonal_matrix(x_dim, output_dim)
            b = self.rng.rand(1, output_dim).astype(
                np.float32) if self.rng.randint(2) else None

            quagga.processor_type = 'gpu'
            x_gpu = Connector(Matrix.from_npa(x))
            W_gpu = Connector(Matrix.from_npa(W))
            b_gpu = Connector(Matrix.from_npa(b)) if b is not None else b
            dot_block_gpu = DotBlock(W_gpu, b_gpu, x_gpu)
            x_gpu.fprop()
            W_gpu.fprop()
            if b_gpu:
                b_gpu.fprop()
            dot_block_gpu.fprop()
            output_gpu = dot_block_gpu.output.to_host()

            quagga.processor_type = 'cpu'
            x_cpu = Connector(Matrix.from_npa(x))
            W_cpu = Connector(Matrix.from_npa(W))
            b_cpu = Connector(Matrix.from_npa(b)) if b is not None else b
            dot_block_cpu = DotBlock(W_cpu, b_cpu, x_cpu)
            x_cpu.fprop()
            W_cpu.fprop()
            if b_cpu:
                b_cpu.fprop()
            dot_block_cpu.fprop()
            output_cpu = dot_block_cpu.output.to_host()

            r.append(np.allclose(output_gpu, output_cpu, atol=1e-5))

        self.assertEqual(sum(r), self.N)
Пример #3
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            batch_size, x_dim, output_dim = self.rng.random_integers(2000, size=3)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            W = self.get_orthogonal_matrix(x_dim, output_dim)
            b = self.rng.rand(1, output_dim).astype(np.float32) if self.rng.randint(2) else None

            quagga.processor_type = 'gpu'
            x_gpu = Connector(Matrix.from_npa(x))
            W_gpu = Connector(Matrix.from_npa(W))
            b_gpu = Connector(Matrix.from_npa(b)) if b is not None else b
            dot_block_gpu = DotBlock(W_gpu, b_gpu, x_gpu)
            x_gpu.fprop()
            W_gpu.fprop()
            if b_gpu:
                b_gpu.fprop()
            dot_block_gpu.fprop()
            output_gpu = dot_block_gpu.output.to_host()

            quagga.processor_type = 'cpu'
            x_cpu = Connector(Matrix.from_npa(x))
            W_cpu = Connector(Matrix.from_npa(W))
            b_cpu = Connector(Matrix.from_npa(b)) if b is not None else b
            dot_block_cpu = DotBlock(W_cpu, b_cpu, x_cpu)
            x_cpu.fprop()
            W_cpu.fprop()
            if b_cpu:
                b_cpu.fprop()
            dot_block_cpu.fprop()
            output_cpu = dot_block_cpu.output.to_host()

            r.append(np.allclose(output_gpu, output_cpu, atol=1e-5))

        self.assertEqual(sum(r), self.N)
Пример #4
0
    def test_theano_grad(self):
        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            batch_size, x_dim = self.rng.random_integers(3000, size=2)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            lr_dot_W = self.rng.rand(x_dim, 1).astype(np.float32)
            lr_dot_b = self.rng.rand(1, 1).astype(np.float32)
            true_labels = self.rng.randint(2, size=(batch_size,
                                                    1)).astype(np.float32)
            dropout_prob = self.rng.uniform()
            seed = self.rng.randint(1000)
            device_id = 0

            # quagga model
            state = self.rng.get_state()
            x_gpu = Connector(Matrix.from_npa(x), device_id)
            true_labels_gpu = Connector(Matrix.from_npa(true_labels))
            lr_dot_W_gpu = Connector(Matrix.from_npa(lr_dot_W), device_id)
            lr_dot_b_gpu = Connector(Matrix.from_npa(lr_dot_b), device_id)

            dropout_block = DropoutBlock(x_gpu, dropout_prob, seed)
            lrdot_block = DotBlock(lr_dot_W_gpu, lr_dot_b_gpu,
                                   dropout_block.output)
            sce_block = SigmoidCeBlock(lrdot_block.output, true_labels_gpu)
            x_gpu.fprop()
            true_labels_gpu.fprop()
            lr_dot_W_gpu.fprop()
            lr_dot_b_gpu.fprop()
            dropout_block.fprop()
            lrdot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            lrdot_block.bprop()
            dropout_block.bprop()
            q_grads = [
                lr_dot_W_gpu.backward_matrix.to_host(),
                lr_dot_b_gpu.backward_matrix.to_host(),
                x_gpu.backward_matrix.to_host()
            ]
            mask = (dropout_block.output.to_host() != 0).astype(np.float32)

            # Theano model
            self.rng.set_state(state)
            th_x = T.fmatrix()
            th_true_labels = T.fmatrix()
            lr_layer = LogisticRegressionLayer(lr_dot_W, lr_dot_b)
            probs = lr_layer.get_output_expr(th_x * mask)
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            th_grads = T.grad(loss, wrt=[lr_layer.W, lr_layer.b, th_x])
            get_theano_grads = theano.function([th_x, th_true_labels],
                                               th_grads)
            th_grads = get_theano_grads(x, true_labels)

            for i, (q_grad, th_grad) in enumerate(izip(q_grads, th_grads)):
                r.append(np.allclose(q_grad, th_grad))

        self.assertEqual(sum(r), len(r))
Пример #5
0
    def test_theano_grad(self):
        class AttentionLayer(object):
            def __init__(self, u, mask=None):
                self.u = theano.shared(value=u)
                self.mask = mask

            def get_output_expr(self, input_expr):
                input_expr = input_expr.dimshuffle(0, 2, 1)
                pre_a = T.dot(input_expr, self.u)[:, :, 0]
                if self.mask:
                    pre_a = self.mask * pre_a - \
                            (1 - self.mask) * 3.402823466e+38
                a = T.nnet.softmax(pre_a)[:, :, np.newaxis]
                return T.sum(a * input_expr, axis=1)

        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                if b is not None:
                    self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                if hasattr(self, 'b'):
                    return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)
                else:
                    return T.nnet.sigmoid(T.dot(input_expr, self.W))

        r = []
        for i in xrange(self.N):
            batch_size = self.rng.random_integers(500)
            x_dim = self.rng.random_integers(3000)
            n_ts = self.rng.random_integers(100)
            x = [
                self.rng.rand(batch_size, x_dim).astype(np.float32)
                for _ in xrange(n_ts)
            ]
            u = self.get_orthogonal_matrix(x_dim, 1)
            lr_dot_W = self.get_orthogonal_matrix(x_dim, 1)
            lr_dot_b = self.rng.rand(1, 1).astype(
                np.float32) if self.rng.randint(2) else None
            true_labels = self.rng.randint(
                2, size=(batch_size, 1)).astype(np.float32)
            mask = self.rng.randint(
                2, size=(batch_size, n_ts)).astype(
                    np.float32) if self.rng.randint(2) else None
            device_id = 0

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_mask = T.fmatrix() if mask is not None else None

            th_true_labels = T.fmatrix()
            attnt_layer = AttentionLayer(u, th_mask)
            lr_layer = LogisticRegressionLayer(lr_dot_W, lr_dot_b)
            probs = th_x
            for layer in [attnt_layer, lr_layer]:
                probs = layer.get_output_expr(probs)
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))

            params = [lr_layer.W, attnt_layer.u, th_x]
            if hasattr(lr_layer, 'b'):
                params.append(lr_layer.b)
            th_grads = T.grad(loss, wrt=params)
            get_theano_grads = theano.function(
                [th_x, th_true_labels
                 ] + ([th_mask] if mask is not None else []), th_grads)
            th_grads = get_theano_grads(
                *([np.dstack(x), true_labels] +
                  ([mask] if mask is not None else [])))

            # quagga model
            self.rng.set_state(state)
            x = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            u = Connector(Matrix.from_npa(u), device_id)
            lr_dot_W = Connector(Matrix.from_npa(lr_dot_W), device_id)
            lr_dot_b = Connector(
                Matrix.from_npa(lr_dot_b),
                device_id) if lr_dot_b is not None else lr_dot_b
            true_labels = Connector(Matrix.from_npa(true_labels))
            if mask is not None:
                mask = Connector(Matrix.from_npa(mask))

            attnt_block = AttentionBlock(x, u, mask)
            lrdot_block = DotBlock(lr_dot_W, lr_dot_b, attnt_block.output)
            sce_block = SigmoidCeBlock(lrdot_block.output, true_labels)

            x.fprop()
            true_labels.fprop()
            u.fprop()
            lr_dot_W.fprop()
            if lr_dot_b:
                lr_dot_b.fprop()
            attnt_block.fprop()
            lrdot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            lrdot_block.bprop()
            attnt_block.bprop()
            q_grads = [
                lr_dot_W.backward_matrix.to_host(),
                u.backward_matrix.to_host(),
                np.dstack([e.backward_matrix.to_host() for e in x])
            ]
            if lr_dot_b:
                q_grads.append(lr_dot_b.backward_matrix.to_host())

            for th_grad, q_grad in izip(th_grads, q_grads):
                r.append(np.allclose(th_grad, q_grad, atol=1.e-7))
                print r[-1]

        self.assertEqual(sum(r), len(r))
Пример #6
0
    def test_theano_grad(self):
        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            batch_size, x_dim = self.rng.random_integers(3000, size=2)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            lrdot_W = self.rng.rand(x_dim, 1).astype(np.float32)
            lrdot_b = self.rng.rand(1, 1).astype(np.float32)
            true_labels = self.rng.randint(2, size=(batch_size,
                                                    1)).astype(np.float32)
            device_id = 0

            for nonlinearity in ['sigmoid', 'tanh', 'relu']:
                # Theano model
                state = self.rng.get_state()
                th_x = T.fmatrix()
                th_true_labels = T.fmatrix()
                lr_layer = LogisticRegressionLayer(lrdot_W, lrdot_b)
                if nonlinearity == 'sigmoid':
                    f = T.nnet.sigmoid
                elif nonlinearity == 'tanh':
                    f = T.tanh
                elif nonlinearity == 'relu':
                    f = T.nnet.relu
                probs = lr_layer.get_output_expr(f(th_x))
                loss = T.mean(T.nnet.binary_crossentropy(
                    probs, th_true_labels))
                th_grads = T.grad(loss, wrt=[lr_layer.W, lr_layer.b, th_x])
                get_theano_grads = theano.function([th_x, th_true_labels],
                                                   th_grads)
                th_grads = get_theano_grads(x, true_labels)

                # quagga model
                self.rng.set_state(state)
                x_gpu = Connector(Matrix.from_npa(x), device_id)
                true_labels_gpu = Connector(Matrix.from_npa(true_labels))
                lrdot_W_gpu = Connector(Matrix.from_npa(lrdot_W), device_id)
                lrdot_b_gpu = Connector(Matrix.from_npa(lrdot_b), device_id)
                nonlinearity_block = NonlinearityBlock(x_gpu, nonlinearity)
                lrdot_block = DotBlock(lrdot_W_gpu, lrdot_b_gpu,
                                       nonlinearity_block.output)
                sce_block = SigmoidCeBlock(lrdot_block.output, true_labels_gpu)

                x_gpu.fprop()
                true_labels_gpu.fprop()
                lrdot_W_gpu.fprop()
                lrdot_b_gpu.fprop()
                nonlinearity_block.fprop()
                lrdot_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                lrdot_block.bprop()
                nonlinearity_block.bprop()
                q_grads = [
                    lrdot_W_gpu.backward_matrix.to_host(),
                    lrdot_b_gpu.backward_matrix.to_host(),
                    x_gpu.backward_matrix.to_host()
                ]

                for q_grad, th_grad in izip(q_grads, th_grads):
                    r.append(np.allclose(q_grad, th_grad, atol=1e-5))

        self.assertEqual(sum(r), len(r))
Пример #7
0
    def test_theano_grad(self):
        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            batch_size, x_dim = self.rng.random_integers(3000, size=2)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            lrdot_W = self.rng.rand(x_dim, 1).astype(np.float32)
            lrdot_b = self.rng.rand(1, 1).astype(np.float32)
            true_labels = self.rng.randint(2, size=(batch_size, 1)).astype(np.float32)
            device_id = 0

            for nonlinearity in ['sigmoid', 'tanh', 'relu']:
                # Theano model
                state = self.rng.get_state()
                th_x = T.fmatrix()
                th_true_labels = T.fmatrix()
                lr_layer = LogisticRegressionLayer(lrdot_W, lrdot_b)
                if nonlinearity == 'sigmoid':
                    f = T.nnet.sigmoid
                elif nonlinearity == 'tanh':
                    f = T.tanh
                elif nonlinearity == 'relu':
                    f = T.nnet.relu
                probs = lr_layer.get_output_expr(f(th_x))
                loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
                th_grads = T.grad(loss, wrt=[lr_layer.W, lr_layer.b, th_x])
                get_theano_grads = theano.function([th_x, th_true_labels], th_grads)
                th_grads = get_theano_grads(x, true_labels)

                # quagga model
                self.rng.set_state(state)
                x_gpu = Connector(Matrix.from_npa(x), device_id)
                true_labels_gpu = Connector(Matrix.from_npa(true_labels))
                lrdot_W_gpu = Connector(Matrix.from_npa(lrdot_W), device_id)
                lrdot_b_gpu = Connector(Matrix.from_npa(lrdot_b), device_id)
                nonlinearity_block = NonlinearityBlock(x_gpu, nonlinearity)
                lrdot_block = DotBlock(lrdot_W_gpu, lrdot_b_gpu, nonlinearity_block.output)
                sce_block = SigmoidCeBlock(lrdot_block.output, true_labels_gpu)

                x_gpu.fprop()
                true_labels_gpu.fprop()
                lrdot_W_gpu.fprop()
                lrdot_b_gpu.fprop()
                nonlinearity_block.fprop()
                lrdot_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                lrdot_block.bprop()
                nonlinearity_block.bprop()
                q_grads = [lrdot_W_gpu.backward_matrix.to_host(),
                           lrdot_b_gpu.backward_matrix.to_host(),
                           x_gpu.backward_matrix.to_host()]

                for q_grad, th_grad in izip(q_grads, th_grads):
                    r.append(np.allclose(q_grad, th_grad, atol=1e-5))

        self.assertEqual(sum(r), len(r))
Пример #8
0
    def test_theano_grad(self):
        class DotLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                if b is not None:
                    self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                if hasattr(self, 'b'):
                    return T.dot(input_expr, self.W) + self.b
                else:
                    return T.dot(input_expr, self.W)

        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                if b is not None:
                    self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                if hasattr(self, 'b'):
                    return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)
                else:
                    return T.nnet.sigmoid(T.dot(input_expr, self.W))

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            batch_size, x_dim, output_dim = self.rng.random_integers(2000, size=3)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            dot_W = self.get_orthogonal_matrix(x_dim, output_dim)
            dot_b = self.rng.rand(1, output_dim).astype(np.float32) if self.rng.randint(2) else None
            lr_dot_W = self.get_orthogonal_matrix(output_dim, 1)
            lr_dot_b = self.rng.rand(1, 1).astype(np.float32) if self.rng.randint(2) else None
            true_labels = self.rng.randint(2, size=(batch_size, 1)).astype(np.float32)
            device_id = 0

            # Theano model
            state = self.rng.get_state()
            th_x = T.fmatrix()
            th_true_labels = T.fmatrix()
            dot_layer = DotLayer(dot_W, dot_b)
            lr_layer = LogisticRegressionLayer(lr_dot_W, lr_dot_b)
            probs = th_x
            for layer in [dot_layer, lr_layer]:
                probs = layer.get_output_expr(probs)
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))

            params = [lr_layer.W, dot_layer.W, th_x]
            if hasattr(lr_layer, 'b'):
                params.append(lr_layer.b)
            if hasattr(dot_layer, 'b'):
                params.append(dot_layer.b)
            th_grads = T.grad(loss, wrt=params)
            get_theano_grads = theano.function([th_x, th_true_labels], th_grads)
            th_grads = get_theano_grads(x, true_labels)

            # quagga model
            self.rng.set_state(state)
            x = Connector(Matrix.from_npa(x), device_id)
            true_labels = Connector(Matrix.from_npa(true_labels))
            dot_W = Connector(Matrix.from_npa(dot_W), device_id)
            dot_b = Connector(Matrix.from_npa(dot_b), device_id) if dot_b is not None else dot_b
            lr_dot_W = Connector(Matrix.from_npa(lr_dot_W), device_id)
            lr_dot_b = Connector(Matrix.from_npa(lr_dot_b), device_id) if lr_dot_b is not None else lr_dot_b

            dot_block = DotBlock(dot_W, dot_b, x)
            lrdot_block = DotBlock(lr_dot_W, lr_dot_b, dot_block.output)
            sce_block = SigmoidCeBlock(lrdot_block.output, true_labels)
            x.fprop()
            true_labels.fprop()
            dot_W.fprop()
            if dot_b:
                dot_b.fprop()
            lr_dot_W.fprop()
            if lr_dot_b:
                lr_dot_b.fprop()
            dot_block.fprop()
            lrdot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            lrdot_block.bprop()
            dot_block.bprop()
            q_grads = [lr_dot_W.backward_matrix.to_host(),
                       dot_W.backward_matrix.to_host(),
                       x.backward_matrix.to_host()]
            if lr_dot_b:
                q_grads.append(lr_dot_b.backward_matrix.to_host())
            if dot_b:
                q_grads.append(dot_b.backward_matrix.to_host())

            for th_grad, q_grad in izip(th_grads, q_grads):
                r.append(np.allclose(th_grad, q_grad, atol=1e-7))

        self.assertEqual(sum(r), len(r))
Пример #9
0
    def test_theano_grad(self):
        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            batch_size, x_dim = self.rng.random_integers(3000, size=2)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            lr_dot_W = self.rng.rand(x_dim, 1).astype(np.float32)
            lr_dot_b = self.rng.rand(1, 1).astype(np.float32)
            true_labels = self.rng.randint(2, size=(batch_size, 1)).astype(np.float32)
            dropout_prob = self.rng.uniform()
            seed = self.rng.randint(1000)
            device_id = 0

            # quagga model
            state = self.rng.get_state()
            x_gpu = Connector(Matrix.from_npa(x), device_id)
            true_labels_gpu = Connector(Matrix.from_npa(true_labels))
            lr_dot_W_gpu = Connector(Matrix.from_npa(lr_dot_W), device_id)
            lr_dot_b_gpu = Connector(Matrix.from_npa(lr_dot_b), device_id)

            dropout_block = DropoutBlock(x_gpu, dropout_prob, seed)
            lrdot_block = DotBlock(lr_dot_W_gpu, lr_dot_b_gpu, dropout_block.output)
            sce_block = SigmoidCeBlock(lrdot_block.output, true_labels_gpu)
            x_gpu.fprop()
            true_labels_gpu.fprop()
            lr_dot_W_gpu.fprop()
            lr_dot_b_gpu.fprop()
            dropout_block.fprop()
            lrdot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            lrdot_block.bprop()
            dropout_block.bprop()
            q_grads = [lr_dot_W_gpu.backward_matrix.to_host(),
                       lr_dot_b_gpu.backward_matrix.to_host(),
                       x_gpu.backward_matrix.to_host()]
            mask = (dropout_block.output.to_host() != 0).astype(np.float32)

            # Theano model
            self.rng.set_state(state)
            th_x = T.fmatrix()
            th_true_labels = T.fmatrix()
            lr_layer = LogisticRegressionLayer(lr_dot_W, lr_dot_b)
            probs = lr_layer.get_output_expr(th_x * mask)
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            th_grads = T.grad(loss, wrt=[lr_layer.W, lr_layer.b, th_x])
            get_theano_grads = theano.function([th_x, th_true_labels], th_grads)
            th_grads = get_theano_grads(x, true_labels)

            for i, (q_grad, th_grad) in enumerate(izip(q_grads, th_grads)):
                r.append(np.allclose(q_grad, th_grad))

        self.assertEqual(sum(r), len(r))
    def test_theano_grad(self):
        class SequentialMeanPoolingLayer(object):
            def get_output_expr(self, input_sequence):
                return T.mean(input_sequence, axis=2)

        class LogisticRegressionLayer(object):
            def __init__(self, W_init, b_init):
                self.W = theano.shared(value=W_init())
                self.b = theano.shared(value=b_init())

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim = self.rng.random_integers(1500)
            x = [self.rng.rand(batch_size, dim).astype(dtype=np.float32) for _ in xrange(max_input_sequence_len)]
            true_labels = self.rng.randint(1, size=(batch_size, 1)).astype(dtype=np.float32)

            W_init = self.get_orthogonal_initializer(dim, 1)
            b_init = lambda: self.rng.rand(1, 1).astype(dtype=np.float32)

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_true_labels = T.fmatrix()
            smp_layer = SequentialMeanPoolingLayer()
            lr_layer = LogisticRegressionLayer(W_init, lambda: b_init()[0])
            probs = lr_layer.get_output_expr(smp_layer.get_output_expr(th_x))
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            grad_x = T.grad(loss, wrt=th_x)
            get_grad_x = theano.function([th_x, th_true_labels], grad_x)

            # quagga model
            self.rng.set_state(state)
            context = Context()
            x = List([Connector(Matrix.from_npa(e), context, context) for e in x])
            true_labels = Connector(Matrix.from_npa(true_labels))
            smp_block = SequentialMeanPoolingBlock(x)
            dot_block = DotBlock(W_init, b_init, smp_block.output)
            sce_block = SigmoidCeBlock(dot_block.output, true_labels)
            x.set_length(sequence_len)
            smp_block.fprop()
            dot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            dot_block.bprop()
            smp_block.bprop()

            dL_dx = [e.backward_matrix.to_host() for e in x]
            dL_dx_th = get_grad_x(np.dstack([e.to_host() for e in x]), true_labels.to_host())
            for i in xrange(dL_dx_th.shape[-1]):
                if not np.allclose(dL_dx[i], dL_dx_th[..., i]):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N)
Пример #11
0
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            batch_size, x_dim, output_dim = self.rng.random_integers(2000,
                                                                     size=3)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            W = self.get_orthogonal_matrix(x_dim, output_dim)
            b = self.rng.rand(1, output_dim).astype(
                np.float32) if self.rng.randint(2) else None
            device_id = 0

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            context = Context()
            x_gpu = Connector(Matrix.from_npa(x), device_id)
            W_gpu = Connector(Matrix.from_npa(W), device_id)
            b_gpu = Connector(Matrix.from_npa(b),
                              device_id) if b is not None else b
            dot_block_gpu = DotBlock(W_gpu, b_gpu, x_gpu)
            x_gpu.fprop()
            W_gpu.fprop()
            if b_gpu:
                b_gpu.fprop()
            dot_block_gpu.fprop()
            _, dL_doutput = dot_block_gpu.output.register_usage(
                device_id, device_id)
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            dL_doutput.assign(context, Matrix.from_npa(random_matrix, 'float'))
            dot_block_gpu.bprop()
            if b is not None:
                dL_db_gpu = b_gpu.backward_matrix.to_host()
            dL_dW_gpu = W_gpu.backward_matrix.to_host()
            dL_dx_gpu = x_gpu.backward_matrix.to_host()

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            context = Context()
            x_cpu = Connector(Matrix.from_npa(x), device_id)
            W_cpu = Connector(Matrix.from_npa(W), device_id)
            b_cpu = Connector(Matrix.from_npa(b),
                              device_id) if b is not None else b
            dot_block_cpu = DotBlock(W_cpu, b_cpu, x_cpu)
            x_cpu.fprop()
            W_cpu.fprop()
            if b_cpu:
                b_cpu.fprop()
            dot_block_cpu.fprop()
            _, dL_doutput = dot_block_cpu.output.register_usage(
                device_id, device_id)
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            dL_doutput.assign(context, Matrix.from_npa(random_matrix, 'float'))
            dot_block_cpu.bprop()
            if b is not None:
                dL_db_cpu = b_cpu.backward_matrix.to_host()
            dL_dW_cpu = W_cpu.backward_matrix.to_host()
            dL_dx_cpu = x_cpu.backward_matrix.to_host()

            r.append(np.allclose(dL_dx_gpu, dL_dx_cpu, atol=1e-5))
            r.append(np.allclose(dL_dW_gpu, dL_dW_cpu, atol=1e-5))
            if b is not None:
                r.append(np.allclose(dL_db_gpu, dL_db_cpu, atol=1e-5))

        self.assertEqual(sum(r), len(r))
Пример #12
0
    def test_theano_grad(self):
        class DotLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                if b is not None:
                    self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                if hasattr(self, 'b'):
                    return T.dot(input_expr, self.W) + self.b
                else:
                    return T.dot(input_expr, self.W)

        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                if b is not None:
                    self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                if hasattr(self, 'b'):
                    return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)
                else:
                    return T.nnet.sigmoid(T.dot(input_expr, self.W))

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            batch_size, x_dim, output_dim = self.rng.random_integers(2000,
                                                                     size=3)
            x = self.rng.rand(batch_size, x_dim).astype(np.float32)
            dot_W = self.get_orthogonal_matrix(x_dim, output_dim)
            dot_b = self.rng.rand(1, output_dim).astype(
                np.float32) if self.rng.randint(2) else None
            lr_dot_W = self.get_orthogonal_matrix(output_dim, 1)
            lr_dot_b = self.rng.rand(1, 1).astype(
                np.float32) if self.rng.randint(2) else None
            true_labels = self.rng.randint(2, size=(batch_size,
                                                    1)).astype(np.float32)
            device_id = 0

            # Theano model
            state = self.rng.get_state()
            th_x = T.fmatrix()
            th_true_labels = T.fmatrix()
            dot_layer = DotLayer(dot_W, dot_b)
            lr_layer = LogisticRegressionLayer(lr_dot_W, lr_dot_b)
            probs = th_x
            for layer in [dot_layer, lr_layer]:
                probs = layer.get_output_expr(probs)
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))

            params = [lr_layer.W, dot_layer.W, th_x]
            if hasattr(lr_layer, 'b'):
                params.append(lr_layer.b)
            if hasattr(dot_layer, 'b'):
                params.append(dot_layer.b)
            th_grads = T.grad(loss, wrt=params)
            get_theano_grads = theano.function([th_x, th_true_labels],
                                               th_grads)
            th_grads = get_theano_grads(x, true_labels)

            # quagga model
            self.rng.set_state(state)
            x = Connector(Matrix.from_npa(x), device_id)
            true_labels = Connector(Matrix.from_npa(true_labels))
            dot_W = Connector(Matrix.from_npa(dot_W), device_id)
            dot_b = Connector(Matrix.from_npa(dot_b),
                              device_id) if dot_b is not None else dot_b
            lr_dot_W = Connector(Matrix.from_npa(lr_dot_W), device_id)
            lr_dot_b = Connector(
                Matrix.from_npa(lr_dot_b),
                device_id) if lr_dot_b is not None else lr_dot_b

            dot_block = DotBlock(dot_W, dot_b, x)
            lrdot_block = DotBlock(lr_dot_W, lr_dot_b, dot_block.output)
            sce_block = SigmoidCeBlock(lrdot_block.output, true_labels)
            x.fprop()
            true_labels.fprop()
            dot_W.fprop()
            if dot_b:
                dot_b.fprop()
            lr_dot_W.fprop()
            if lr_dot_b:
                lr_dot_b.fprop()
            dot_block.fprop()
            lrdot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            lrdot_block.bprop()
            dot_block.bprop()
            q_grads = [
                lr_dot_W.backward_matrix.to_host(),
                dot_W.backward_matrix.to_host(),
                x.backward_matrix.to_host()
            ]
            if lr_dot_b:
                q_grads.append(lr_dot_b.backward_matrix.to_host())
            if dot_b:
                q_grads.append(dot_b.backward_matrix.to_host())

            for th_grad, q_grad in izip(th_grads, q_grads):
                r.append(np.allclose(th_grad, q_grad, atol=1e-7))

        self.assertEqual(sum(r), len(r))
    def test_theano_grad(self):
        device_id = 0
        class SequentialHorizontalStackLayer(object):
            def get_output_expr(self, x_sequence, y_sequence):
                return T.concatenate((x_sequence, y_sequence), axis=1)

        class SequentialMeanPoolingLayer(object):
            def get_output_expr(self, input_sequence):
                return T.mean(input_sequence, axis=2)

        class LogisticRegressionLayer(object):
            def __init__(self, W_init, b_init):
                self.W = theano.shared(value=W_init())
                self.b = theano.shared(value=b_init())

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            dim_x, dim_y = self.rng.random_integers(1280, size=2)
            x = [self.rng.rand(batch_size, dim_x).astype(dtype=np.float32) for _ in xrange(max_input_sequence_len)]
            y = [self.rng.rand(batch_size, dim_y).astype(dtype=np.float32) for _ in xrange(max_input_sequence_len)]
            true_labels = self.rng.randint(1, size=(batch_size, 1)).astype(dtype=np.float32)

            W_init = self.get_orthogonal_initializer(dim_x + dim_y, 1)
            b_init = lambda: self.rng.rand(1, 1).astype(dtype=np.float32)

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_y = T.ftensor3()
            th_true_labels = T.fmatrix()
            shs_layer = SequentialHorizontalStackLayer()
            smp_layer = SequentialMeanPoolingLayer()
            lr_layer = LogisticRegressionLayer(W_init, lambda: b_init()[0])
            probs = shs_layer.get_output_expr(th_x, th_y)
            probs = lr_layer.get_output_expr(smp_layer.get_output_expr(probs))
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            grads = T.grad(loss, wrt=[th_x, th_y])
            get_grads = theano.function([th_x, th_y, th_true_labels], grads)
            dL_dx_sequence_th, dL_dy_sequence_th = get_grads(np.dstack(x[:sequence_len]), np.dstack(y[:sequence_len]), true_labels)

            # quagga model
            self.rng.set_state(state)
            W = Connector(Matrix.from_npa(W_init(), device_id=device_id), device_id)
            b = Connector(Matrix.from_npa(b_init(), device_id=device_id), device_id)
            x = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            y = List([Connector(Matrix.from_npa(e), device_id) for e in y])
            true_labels = Connector(Matrix.from_npa(true_labels))
            shs_block = SequentialHorizontalStackBlock(x, y)
            smp_block = SequentialMeanPoolingBlock(shs_block.output)
            dot_block = DotBlock(W, b, smp_block.output)
            sce_block = SigmoidCeBlock(dot_block.output, true_labels)
            x.length = sequence_len
            y.length = sequence_len
            shs_block.fprop()
            smp_block.fprop()
            dot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            dot_block.bprop()
            smp_block.bprop()
            shs_block.bprop()
            dL_dx_sequence = [e.backward_matrix.to_host() for e in x]
            dL_dy_sequence = [e.backward_matrix.to_host() for e in y]

            for i in xrange(dL_dx_sequence_th.shape[-1]):
                if not np.allclose(dL_dx_sequence[i], dL_dx_sequence_th[..., i], atol=1.e-6):
                    r.append(False)
                    break
            else:
                r.append(True)
            for i in xrange(dL_dy_sequence_th.shape[-1]):
                if not np.allclose(dL_dy_sequence[i], dL_dy_sequence_th[..., i], atol=1.e-6):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N * 2)
Пример #14
0
    def test_theano_grad(self):
        class AttentionLayer(object):
            def __init__(self, u, mask=None):
                self.u = theano.shared(value=u)
                self.mask = mask

            def get_output_expr(self, input_expr):
                input_expr = input_expr.dimshuffle(0, 2, 1)
                pre_a = T.dot(input_expr, self.u)[:, :, 0]
                if self.mask:
                    pre_a = self.mask * pre_a - \
                            (1 - self.mask) * 3.402823466e+38
                a = T.nnet.softmax(pre_a)[:, :, np.newaxis]
                return T.sum(a * input_expr, axis=1)

        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                if b is not None:
                    self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                if hasattr(self, 'b'):
                    return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)
                else:
                    return T.nnet.sigmoid(T.dot(input_expr, self.W))

        r = []
        for i in xrange(self.N):
            batch_size = self.rng.random_integers(500)
            x_dim = self.rng.random_integers(3000)
            n_ts = self.rng.random_integers(100)
            x = [
                self.rng.rand(batch_size, x_dim).astype(np.float32)
                for _ in xrange(n_ts)
            ]
            u = self.get_orthogonal_matrix(x_dim, 1)
            lr_dot_W = self.get_orthogonal_matrix(x_dim, 1)
            lr_dot_b = self.rng.rand(1, 1).astype(
                np.float32) if self.rng.randint(2) else None
            true_labels = self.rng.randint(2, size=(batch_size,
                                                    1)).astype(np.float32)
            mask = self.rng.randint(2, size=(batch_size, n_ts)).astype(
                np.float32) if self.rng.randint(2) else None
            device_id = 0

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_mask = T.fmatrix() if mask is not None else None

            th_true_labels = T.fmatrix()
            attnt_layer = AttentionLayer(u, th_mask)
            lr_layer = LogisticRegressionLayer(lr_dot_W, lr_dot_b)
            probs = th_x
            for layer in [attnt_layer, lr_layer]:
                probs = layer.get_output_expr(probs)
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))

            params = [lr_layer.W, attnt_layer.u, th_x]
            if hasattr(lr_layer, 'b'):
                params.append(lr_layer.b)
            th_grads = T.grad(loss, wrt=params)
            get_theano_grads = theano.function(
                [th_x, th_true_labels] +
                ([th_mask] if mask is not None else []), th_grads)
            th_grads = get_theano_grads(
                *([np.dstack(x), true_labels] +
                  ([mask] if mask is not None else [])))

            # quagga model
            self.rng.set_state(state)
            x = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            u = Connector(Matrix.from_npa(u), device_id)
            lr_dot_W = Connector(Matrix.from_npa(lr_dot_W), device_id)
            lr_dot_b = Connector(
                Matrix.from_npa(lr_dot_b),
                device_id) if lr_dot_b is not None else lr_dot_b
            true_labels = Connector(Matrix.from_npa(true_labels))
            if mask is not None:
                mask = Connector(Matrix.from_npa(mask))

            attnt_block = AttentionBlock(x, u, mask)
            lrdot_block = DotBlock(lr_dot_W, lr_dot_b, attnt_block.output)
            sce_block = SigmoidCeBlock(lrdot_block.output, true_labels)

            x.fprop()
            true_labels.fprop()
            u.fprop()
            lr_dot_W.fprop()
            if lr_dot_b:
                lr_dot_b.fprop()
            attnt_block.fprop()
            lrdot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            lrdot_block.bprop()
            attnt_block.bprop()
            q_grads = [
                lr_dot_W.backward_matrix.to_host(),
                u.backward_matrix.to_host(),
                np.dstack([e.backward_matrix.to_host() for e in x])
            ]
            if lr_dot_b:
                q_grads.append(lr_dot_b.backward_matrix.to_host())

            for th_grad, q_grad in izip(th_grads, q_grads):
                r.append(np.allclose(th_grad, q_grad, atol=1.e-7))
                print r[-1]

        self.assertEqual(sum(r), len(r))
    def test_theano_grad(self):
        class SequentialMeanPoolingLayer(object):
            def get_output_expr(self, input_sequence):
                return T.mean(input_sequence, axis=2)

        class LogisticRegressionLayer(object):
            def __init__(self, W_init, b_init):
                self.W = theano.shared(value=W_init())
                self.b = theano.shared(value=b_init())

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim = self.rng.random_integers(1500)
            x = [
                self.rng.rand(batch_size, dim).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = self.rng.randint(1,
                                           size=(batch_size,
                                                 1)).astype(dtype=np.float32)

            W_init = self.get_orthogonal_initializer(dim, 1)
            b_init = lambda: self.rng.rand(1, 1).astype(dtype=np.float32)

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_true_labels = T.fmatrix()
            smp_layer = SequentialMeanPoolingLayer()
            lr_layer = LogisticRegressionLayer(W_init, lambda: b_init()[0])
            probs = lr_layer.get_output_expr(smp_layer.get_output_expr(th_x))
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            grad_x = T.grad(loss, wrt=th_x)
            get_grad_x = theano.function([th_x, th_true_labels], grad_x)

            # quagga model
            self.rng.set_state(state)
            context = Context()
            x = List(
                [Connector(Matrix.from_npa(e), context, context) for e in x])
            true_labels = Connector(Matrix.from_npa(true_labels))
            smp_block = SequentialMeanPoolingBlock(x)
            dot_block = DotBlock(W_init, b_init, smp_block.output)
            sce_block = SigmoidCeBlock(dot_block.output, true_labels)
            x.set_length(sequence_len)
            smp_block.fprop()
            dot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            dot_block.bprop()
            smp_block.bprop()

            dL_dx = [e.backward_matrix.to_host() for e in x]
            dL_dx_th = get_grad_x(np.dstack([e.to_host() for e in x]),
                                  true_labels.to_host())
            for i in xrange(dL_dx_th.shape[-1]):
                if not np.allclose(dL_dx[i], dL_dx_th[..., i]):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N)
Пример #16
0
    def test_theano_grad(self):
        device_id = 0

        class SequentialHorizontalStackLayer(object):
            def get_output_expr(self, x_sequence, y_sequence):
                return T.concatenate((x_sequence, y_sequence), axis=1)

        class SequentialMeanPoolingLayer(object):
            def get_output_expr(self, input_sequence):
                return T.mean(input_sequence, axis=2)

        class LogisticRegressionLayer(object):
            def __init__(self, W_init, b_init):
                self.W = theano.shared(value=W_init())
                self.b = theano.shared(value=b_init())

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            dim_x, dim_y = self.rng.random_integers(1280, size=2)
            x = [
                self.rng.rand(batch_size, dim_x).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            y = [
                self.rng.rand(batch_size, dim_y).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = self.rng.randint(1,
                                           size=(batch_size,
                                                 1)).astype(dtype=np.float32)

            W_init = self.get_orthogonal_initializer(dim_x + dim_y, 1)
            b_init = lambda: self.rng.rand(1, 1).astype(dtype=np.float32)

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_y = T.ftensor3()
            th_true_labels = T.fmatrix()
            shs_layer = SequentialHorizontalStackLayer()
            smp_layer = SequentialMeanPoolingLayer()
            lr_layer = LogisticRegressionLayer(W_init, lambda: b_init()[0])
            probs = shs_layer.get_output_expr(th_x, th_y)
            probs = lr_layer.get_output_expr(smp_layer.get_output_expr(probs))
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            grads = T.grad(loss, wrt=[th_x, th_y])
            get_grads = theano.function([th_x, th_y, th_true_labels], grads)
            dL_dx_sequence_th, dL_dy_sequence_th = get_grads(
                np.dstack(x[:sequence_len]), np.dstack(y[:sequence_len]),
                true_labels)

            # quagga model
            self.rng.set_state(state)
            W = Connector(Matrix.from_npa(W_init(), device_id=device_id),
                          device_id)
            b = Connector(Matrix.from_npa(b_init(), device_id=device_id),
                          device_id)
            x = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            y = List([Connector(Matrix.from_npa(e), device_id) for e in y])
            true_labels = Connector(Matrix.from_npa(true_labels))
            shs_block = SequentialHorizontalStackBlock(x, y)
            smp_block = SequentialMeanPoolingBlock(shs_block.output)
            dot_block = DotBlock(W, b, smp_block.output)
            sce_block = SigmoidCeBlock(dot_block.output, true_labels)
            x.length = sequence_len
            y.length = sequence_len
            shs_block.fprop()
            smp_block.fprop()
            dot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            dot_block.bprop()
            smp_block.bprop()
            shs_block.bprop()
            dL_dx_sequence = [e.backward_matrix.to_host() for e in x]
            dL_dy_sequence = [e.backward_matrix.to_host() for e in y]

            for i in xrange(dL_dx_sequence_th.shape[-1]):
                if not np.allclose(dL_dx_sequence[i],
                                   dL_dx_sequence_th[..., i],
                                   atol=1.e-6):
                    r.append(False)
                    break
            else:
                r.append(True)
            for i in xrange(dL_dy_sequence_th.shape[-1]):
                if not np.allclose(dL_dy_sequence[i],
                                   dL_dy_sequence_th[..., i],
                                   atol=1.e-6):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N * 2)