Пример #1
0
    def test_bprop_vector(self):
        r = []
        for _ in xrange(self.N):
            embd_dim = self.rng.random_integers(10000)
            batch_size, output_dim = self.rng.random_integers(2000, size=2)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32)
            true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32)
            device_id = 0

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qrow_idxs = Connector(Matrix.from_npa(row_idxs))
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                qW = Connector(Matrix.from_npa(W), device_id)
                row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
                sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels)
                qW.fprop()
                qrow_idxs.fprop()
                row_slicing_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                row_slicing_block.bprop()
                qW.add(Context(), qW.backward_matrix)
                output[processor_type] = qW.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))
Пример #2
0
    def test_theano_bprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(2, max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)
            true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            device_id = 0

            quagga.processor_type = 'gpu'
            qrow_idxs = Connector(Matrix.from_npa(row_idxs))
            qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols)
            qW = Connector(Matrix.from_npa(W), device_id)
            row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
            seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                           params=[],
                                           sequences=[row_slicing_block.output, qtrue_labels])
            qW.fprop()
            qrow_idxs.ncols = sequence_len
            qrow_idxs.fprop()
            row_slicing_block.fprop()
            seq_sce_block.fprop()
            seq_sce_block.bprop()
            row_slicing_block.bprop()
            qW.add(Context(), qW.backward_matrix)

            th_row_idxs = T.imatrix()
            th_true_labels = T.imatrix()
            row_slicing_layer = RowSlicingLayer(W)
            toutput = row_slicing_layer.get_output_expr(th_row_idxs)
            loss = SequentialSoftmaxLayer.get_loss(toutput, th_true_labels)
            dL_dW = T.grad(loss, row_slicing_layer.W)
            fun = theano.function([th_row_idxs, th_true_labels],
                                  updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)])
            fun(row_idxs, np.hstack(true_labels[:sequence_len]))

            r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value(), atol=1e-5))

        self.assertEqual(sum(r), len(r))
Пример #3
0
    def test_theano_bprop_vector(self):
        r = []
        for _ in xrange(self.N):
            embd_dim = self.rng.random_integers(10000)
            batch_size, output_dim = self.rng.random_integers(2000, size=2)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32)
            true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32)
            device_id = 0

            quagga.processor_type = 'gpu'
            qrow_idxs = Connector(Matrix.from_npa(row_idxs))
            qW = Connector(Matrix.from_npa(W), device_id)
            qtrue_labels = Connector(Matrix.from_npa(true_labels))
            row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
            sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels)
            qtrue_labels.fprop()
            qW.fprop()
            qrow_idxs.fprop()
            row_slicing_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            row_slicing_block.bprop()
            qW.add(Context(), qW.backward_matrix)

            th_row_idxs = T.ivector()
            th_true_labels = T.ivector()
            row_slicing_layer = RowSlicingLayer(W)
            toutput = row_slicing_layer.get_output_expr(th_row_idxs)
            loss = SoftmaxLayer.get_loss(toutput, th_true_labels)
            dL_dW = T.grad(loss, row_slicing_layer.W)
            fun = theano.function([th_row_idxs, th_true_labels],
                                  updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)])
            fun(row_idxs[:, 0], true_labels[:, 0])
            r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value()))

        self.assertEqual(sum(r), len(r))
Пример #4
0
    def test_bprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)
            true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            device_id = 0

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qrow_idxs = Connector(Matrix.from_npa(row_idxs))
                qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols)
                qW = Connector(Matrix.from_npa(W), device_id)
                row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
                seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                               params=[],
                                               sequences=[row_slicing_block.output, qtrue_labels])
                qW.fprop()
                qrow_idxs.ncols = sequence_len
                qrow_idxs.fprop()
                row_slicing_block.fprop()
                seq_sce_block.fprop()
                seq_sce_block.bprop()
                row_slicing_block.bprop()
                qW.add(Context(), qW.backward_matrix)
                output[processor_type] = qW.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))
Пример #5
0
class DotBlock(object):
    """
    Computes dot product (scalar product) between matrices ``W`` and ``x``, also
    adds bias ``b``.

    Parameters
    ----------
    W : Matrix (GpuMatrix or CpuMatrix)
        Weigh matrix
    b : Matrix (GpuMatrix or CpuMatrix)
        Bias matrix (one dimesion equals 1, can be view as a vector)
    x : Matrix (GpuMatrix or CpuMatrix)
        Block's input
    device_id : int
        Defines the device's id on which the computation will take place
    """
    def __init__(self, W, b, x, device_id=None):
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id

        if W.bpropagable:
            self.W, self.dL_dW = W.register_usage(device_id, device_id)
        else:
            self.W = W.register_usage(device_id)
        if b:
            if b.bpropagable:
                self.b, self.dL_db = b.register_usage(device_id, device_id)
                self.ones = Matrix.empty(x.nrows, 1, self.b.dtype, device_id)
                self.ones.sync_fill(1.0)
            else:
                self.b = b.register_usage(device_id)
        if x.bpropagable:
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)

        output = Matrix.empty(x.nrows, self.W.ncols, device_id=device_id)
        self.learning = hasattr(self, 'dL_dW') or hasattr(self, 'dL_db') or \
                        hasattr(self, 'dL_dx')
        if self.learning:
            self.b_context = Context(device_id)
            self.output = Connector(output, device_id)
        else:
            self.output = Connector(output)

    def fprop(self):
        self.output.assign_dot(self.f_context, self.x, self.W)
        if hasattr(self, 'b'):
            self.output.add(self.f_context, self.b)
        self.output.fprop()

    def bprop(self):
        if not self.learning:
            return
        dL_doutput = self.output.backward_matrix
        # dL/dW = x.T * dL_doutput
        if hasattr(self, 'dL_dW'):
            self.dL_dW.add_dot(self.b_context, self.x, dL_doutput, 'T')
        # TODO(sergii): replace this modification with reduction kernel along axis=0
        # dL/db = 1.T * dL_doutput
        if hasattr(self, 'dL_db'):
            self.dL_db.add_dot(self.b_context, self.ones, dL_doutput, 'T')
        # dL/dx = dL_doutput * W.T
        if hasattr(self, 'dL_dx'):
            self.dL_dx.add_dot(self.b_context, dL_doutput, self.W, 'N', 'T')