Пример #1
0
    def test_theano_grad(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            batch_size, dim = self.rng.random_integers(2000, size=2)
            y_hat = self.rng.randn(batch_size, dim).astype(dtype=np.float32)
            y = self.rng.randn(batch_size, dim).astype(dtype=np.float32)

            # Theano model
            th_y_hat, th_y = T.fmatrix(), T.fmatrix()
            loss = T.mean(T.sum((th_y_hat - th_y) ** 2, axis=1))
            get_theano_grads = theano.function([th_y_hat, th_y], T.grad(loss, wrt=th_y_hat))
            th_dL_dy_hat = get_theano_grads(y_hat, y)

            # quagga model
            context = Context()
            y_hat_gpu = Connector(Matrix.from_npa(y_hat), context, context)
            y_gpu = Connector(Matrix.from_npa(y))
            sigmoid_ce_block = SseBlock(y_hat_gpu, y_gpu)
            sigmoid_ce_block.fprop()
            sigmoid_ce_block.bprop()
            q_dL_dy_hat = y_hat_gpu.backward_matrix.to_host()

            r.append(np.allclose(th_dL_dy_hat, q_dL_dy_hat))

        self.assertEqual(sum(r), self.N)
Пример #2
0
class SequentialMeanPoolingBlock(object):
    # TODO(sergii): change sequentially_tile to add_sequentially_tile, because can erase gradients
    def __init__(self, matrices, device_id=None):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.output = Matrix.empty_like(matrices[0], device_id)
        learning = matrices[0].bpropagable
        self.output = Connector(self.output, device_id if learning else None)
        if learning:
            self.matrices, self.dL_dmatrices = izip(
                *matrices.register_usage(device_id, device_id))
        else:
            self.matrices = matrices.register_usage(device_id)
        self.length = matrices.length

    def fprop(self):
        self.output.assign_sequential_mean_pooling(self.context,
                                                   self.matrices[:self.length])
        self.output.fprop()

    def bprop(self):
        dL_doutput = self.output.backward_matrix
        dL_doutput.scale(self.context, ct.c_float(1.0 / self.length))
        Matrix.sequentially_tile(self.context, dL_doutput,
                                 self.dL_dmatrices[:self.length])
Пример #3
0
    def __init__(self, W, b, x, device_id=None):
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id

        if W.bpropagable:
            self.W, self.dL_dW = W.register_usage(device_id, device_id)
        else:
            self.W = W.register_usage(device_id)
        if b:
            if b.bpropagable:
                self.b, self.dL_db = b.register_usage(device_id, device_id)
                self.ones = Matrix.empty(x.nrows, 1, self.b.dtype, device_id)
                self.ones.sync_fill(1.0)
            else:
                self.b = b.register_usage(device_id)
        if x.bpropagable:
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)

        output = Matrix.empty(x.nrows, self.W.ncols, device_id=device_id)
        self.learning = hasattr(self, 'dL_dW') or hasattr(self, 'dL_db') or \
                        hasattr(self, 'dL_dx')
        if self.learning:
            self.b_context = Context(device_id)
            self.output = Connector(output, device_id)
        else:
            self.output = Connector(output)
Пример #4
0
    def __init__(self, matrix, axis=1, device_id=None):
        self.context = Context(device_id)
        self._ctype = matrix.c_dtype
        self._zero = self._ctype(0.0)
        if axis == 0:
            self._ones = Matrix.empty(1, matrix.nrows, matrix.dtype, device_id)
            self.output = Matrix.empty(1, matrix.ncols, matrix.dtype,
                                       device_id)
            self.alpha = self._ctype(1.0 / matrix.nrows)
        elif axis == 1:
            self._ones = Matrix.empty(matrix.ncols, 1, matrix.dtype, device_id)
            self.output = Matrix.empty(matrix.nrows, 1, matrix.dtype,
                                       device_id)
            self.alpha = None
        else:
            raise ValueError('Invalid axis!')
        self._ones.sync_fill(1.0)
        self.axis = axis

        if matrix.bpropagable:
            self.matrix, self.dL_dmatrix = matrix.register_usage(
                self.context, self.context)
            self.output = Connector(self.output, self.context, self.context)
        else:
            self.matrix = matrix.register_usage(self.context)
            self.output = Connector(self.output, self.context)
Пример #5
0
class ColSlicingBlock(object):
    """
    Parameters
    ----------
    W : Matrix (GpuMatrix or CpuMatrix)
    col_indexes

    """
    def __init__(self, W, col_indexes):
        device_id = W.device_id
        self.context = Context(device_id)
        learning = W.bpropagable
        if learning:
            self.W, self.dL_dW = W.register_usage_with_sparse_backward_matrix()
        else:
            self.W = W.register_usage(device_id)
        self.col_indexes = col_indexes.register_usage(device_id)
        output = Matrix.empty(W.nrows, col_indexes.ncols, device_id=device_id)
        self.output = Connector(output, device_id if learning else None)

    def fprop(self):
        self.W.slice_columns(self.context, self.col_indexes, self.output)
        self.output.fprop()

    def bprop(self):
        if hasattr(self, 'dL_dW'):
            self.dL_dW.add_columns_slice(self.col_indexes, self.output.bprop())
Пример #6
0
class ArgmaxBlock(object):
    """
    Determines argmax values along the specified ``axis`` in the input matrix.
    The block returns a vector (matrix with one of its dimensions equals 1) of
    argmax values.


    Parameters
    ----------
    x : Matrix (GpuMatrix or CpuMatrix)
        Block's input
    axis : int
        Axis along which argmax is determined
    device_id : int
        Defines the device's id on which the computation will take place

    Returns
    -------
    vector
        A vector containing argmax values (e.g. argmax for each row if axis == 1).
    """
    def __init__(self, x, axis, device_id=None):
        if axis != 1:
            raise NotImplementedError
        self.axis = axis
        self.context = Context(device_id)
        device_id = self.context.device_id

        self.x = x.register_usage(device_id)
        self.output = Connector(Matrix.empty(x.nrows, 1, x.dtype, device_id))

    def fprop(self):
        self.x.argmax(self.context, self.output, self.axis)
        self.output.fprop()
Пример #7
0
class LastSelectorBlock(object):
    """
    TODO(igor).

    Parameters
    ----------
    x : Matrix (GpuMatrix or CpuMatrix)

    """
    def __init__(self, x):
        device_id = x[0].device_id
        learning = x[0].bpropagable
        self.context = Context(device_id)
        self.output = Matrix.empty_like(x[0])
        self.output = Connector(self.output, device_id if learning else None)
        if learning:
            self.x, self.dL_dx = izip(*x.register_usage(device_id, device_id))
        else:
            self.x = x.register_usage(device_id)
        self.last_idx = x.length - 1

    def fprop(self):
        self.output.assign(self.context, self.x[self.last_idx])
        self.output.fprop()

    def bprop(self):
        self.dL_dx[self.last_idx].add(self.context, self.output.backward_matrix)
Пример #8
0
    def __init__(self, train_data, valid_data, batch_size, word_dropout_prob, device_id):
        self.train_data = HomogeneousDataIterator(train_data, batch_size, randomize=True, infinite=True)
        self.valid_data = HomogeneousDataIterator(valid_data, batch_size)
        self.train_data_iterator = iter(self.train_data)
        self.valid_data_iterator = iter(self.valid_data)
        self.word_keep_prob = 1.0 - word_dropout_prob
        self.rnd = RandomState(47571)
        self.unk_idx = word_to_idx['<UNK>']

        self.context = Context(device_id)
        c = Counter([len(line) for line in chain(train_data, valid_data)])
        print c.most_common()
        max_len = max([len(line) for line in chain(train_data, valid_data)])

        self.enc_x = Connector(Matrix.empty(batch_size, max_len, 'int', device_id))
        self.enc_lengths = Matrix.empty(self.enc_x.nrows, 1, 'int', device_id)
        self._enc_mask = Matrix.empty(self.enc_x.nrows, self.enc_x.ncols, 'float', device_id)
        self.enc_mask = List([Connector(self._enc_mask[:, i]) for i in xrange(max_len)], self.enc_x.ncols)

        self.dec_x = Connector(Matrix.empty(batch_size, max_len + 1, 'int', device_id))
        self._dec_y = Matrix.empty(batch_size, max_len + 1, 'int', device_id)
        self.dec_y = List([Connector(self._dec_y[:, i]) for i in xrange(max_len + 1)], self._dec_y.ncols)
        self.dec_lengths = Matrix.empty(self.dec_x.nrows, 1, 'int', device_id)
        self._dec_mask = Matrix.empty(self.dec_x.nrows, self.dec_x.ncols, 'float', device_id)
        self.dec_mask = List([Connector(self._dec_mask[:, i]) for i in xrange(max_len + 1)], self.dec_x.ncols)

        self.blocking_contexts = None
        self.training_mode = True
Пример #9
0
 def __init__(self, data, char_to_idx, batch_size, x_device_id,
              y_device_id):
     self.data = HomogeneousDataIterator(data, char_to_idx, batch_size,
                                         True, True)
     self.data_iterator = iter(self.data)
     self.x_context = Context(x_device_id)
     self.y_context = Context(y_device_id)
     max_len = 0
     for sub_line in data:
         cur_len = len(sub_line)
         if cur_len > max_len:
             max_len = cur_len
     print max_len
     self.x = Connector(
         Matrix.empty(batch_size, max_len - 1, 'int', x_device_id))
     self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id)
     self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)],
                   self.x.ncols)
     self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id)
     self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float',
                               x_device_id)
     self.mask = List(
         [Connector(self._mask[:, i]) for i in xrange(max_len)],
         self.x.ncols)
     self.blocking_contexts = None
Пример #10
0
class RepeatBlock(object):
    def __init__(self, x, repeats, axis=None, device_id=None):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.repeats = repeats
        self.axis = axis
        learning = x.bpropagable
        if learning:
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        if axis == 0:
            self.output = Matrix.empty(x.nrows * repeats, x.ncols, x.dtype, device_id)
        elif axis == 1:
            self.output = Matrix.empty(x.nrows, x.ncols * repeats, x.dtype, device_id)
        else:
            raise ValueError('TODO')
        self.output = Connector(self.output, device_id if learning else None)

    def fprop(self):
        self.output.assign_repeat(self.context, self.x, self.repeats, self.axis)
        self.output.fprop()

    def bprop(self):
        if hasattr(self, 'dL_dx'):
            self.dL_dx.add_repeat_derivative(self.context, self.output.backward_matrix, self.repeats, self.axis)
Пример #11
0
class LastSelectorBlock(object):
    """
    TODO(igor).

    Parameters
    ----------
    x : Matrix (GpuMatrix or CpuMatrix)

    """
    def __init__(self, x):
        device_id = x[0].device_id
        learning = x[0].bpropagable
        self.context = Context(device_id)
        self.output = Matrix.empty_like(x[0])
        self.output = Connector(self.output, device_id if learning else None)
        if learning:
            self.x, self.dL_dx = izip(*x.register_usage(device_id, device_id))
        else:
            self.x = x.register_usage(device_id)
        self.last_idx = x.length - 1

    def fprop(self):
        self.output.assign(self.context, self.x[self.last_idx])
        self.output.fprop()

    def bprop(self):
        self.dL_dx[self.last_idx].add(self.context,
                                      self.output.backward_matrix)
Пример #12
0
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            batch_size, dim = self.rng.random_integers(2000, size=2)
            y_hat = self.rng.randn(batch_size, dim).astype(dtype=np.float32)
            y = self.rng.randn(batch_size, dim).astype(dtype=np.float32)

            quagga.processor_type = 'gpu'
            context = Context()
            y_hat_gpu = Connector(Matrix.from_npa(y_hat), context, context)
            y_gpu = Connector(Matrix.from_npa(y))
            sse_block = SseBlock(y_hat_gpu, y_gpu)
            sse_block.fprop()
            sse_block.bprop()
            dL_dy_hat_gpu = y_hat_gpu.backward_matrix.to_host()

            quagga.processor_type = 'cpu'
            context = Context()
            y_hat_cpu = Connector(Matrix.from_npa(y_hat), context, context)
            y_cpu = Connector(Matrix.from_npa(y))
            sse_block = SseBlock(y_hat_cpu, y_cpu)
            sse_block.fprop()
            sse_block.bprop()
            dL_dy_hat_cpu = y_hat_cpu.backward_matrix.to_host()

            r.append(np.allclose(dL_dy_hat_gpu, dL_dy_hat_cpu))

        self.assertEqual(sum(r), self.N)
Пример #13
0
    def __init__(self, x, nonlinearity, device_id=None):
        """


        """
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.learning = x.bpropagable
        if self.learning:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
            self._df_dpref = Matrix.empty_like(self.x, device_id)
        else:
            self.x = x.register_usage(device_id)
        output = Matrix.empty_like(x, device_id)
        self.output = Connector(output, device_id if self.learning else None)
        if nonlinearity == 'sigmoid':
            self.f = self.x.sigmoid
        elif nonlinearity == 'tanh':
            self.f = self.x.tanh
        elif nonlinearity == 'relu':
            self.f = self.x.relu
        elif nonlinearity == 'softmax':
            raise ValueError('For softmax nonlinearity use SoftmaxBlock!')
        else:
            raise ValueError('TODO!')
        self.training_mode = True
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim = self.rng.random_integers(1500)
            x = [
                self.rng.rand(batch_size, dim).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            x_gpu = List([Connector(Matrix.from_npa(e)) for e in x])
            smean_pooling_block_gpu = SequentialMeanPoolingBlock(x_gpu)
            x_gpu.set_length(sequence_len)
            smean_pooling_block_gpu.fprop()
            output_gpu = smean_pooling_block_gpu.output.to_host()

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            x_cpu = List([Connector(Matrix.from_npa(e)) for e in x])
            smean_pooling_block_cpu = SequentialMeanPoolingBlock(x_cpu)
            x_cpu.set_length(sequence_len)
            smean_pooling_block_cpu.fprop()
            output_cpu = smean_pooling_block_cpu.output.to_host()

            r.append(np.allclose(output_gpu, output_cpu))

        self.assertEqual(sum(r), self.N)
Пример #15
0
class NonlinearityBlock(object):
    """
    Applies nonlinear functions (``sigmoid``, ``tahn``, ``relu``) on input.

    Parameters
    ----------
    x : Matrix (GpuMatrix or CpuMatrix)
    nonlinearity : string
    device_id : int
    """

    def __init__(self, x, nonlinearity, device_id=None):
        """


        """
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.learning = x.bpropagable
        if self.learning:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
            self._df_dpref = Matrix.empty_like(self.x, device_id)
        else:
            self.x = x.register_usage(device_id)
        output = Matrix.empty_like(x, device_id)
        self.output = Connector(output, device_id if self.learning else None)
        if nonlinearity == "sigmoid":
            self.f = self.x.sigmoid
        elif nonlinearity == "tanh":
            self.f = self.x.tanh
        elif nonlinearity == "relu":
            self.f = self.x.relu
        elif nonlinearity == "softmax":
            raise ValueError("For softmax nonlinearity use SoftmaxBlock!")
        else:
            raise ValueError("TODO!")
        self.training_mode = True

    @property
    def df_dpref(self):
        if self.training_mode and self.learning:
            return self._df_dpref

    def fprop(self):
        self.f(self.f_context, self.output, self.df_dpref)
        self.output.fprop()

    def bprop(self):
        if hasattr(self, "dL_dx"):
            # dL/dpref = dL/df .* df/dpref
            dL_df = self.output.backward_matrix
            self.dL_dx.add_hprod(self.b_context, dL_df, self.df_dpref)

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False
Пример #16
0
class NonlinearityBlock(object):
    """
    Applies nonlinear functions (``sigmoid``, ``tahn``, ``relu``) on input.

    Parameters
    ----------
    x : Matrix (GpuMatrix or CpuMatrix)
    nonlinearity : string
    device_id : int
    """
    def __init__(self, x, nonlinearity, device_id=None):
        """


        """
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.learning = x.bpropagable
        if self.learning:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
            self._df_dpref = Matrix.empty_like(self.x, device_id)
        else:
            self.x = x.register_usage(device_id)
        output = Matrix.empty_like(x, device_id)
        self.output = Connector(output, device_id if self.learning else None)
        if nonlinearity == 'sigmoid':
            self.f = self.x.sigmoid
        elif nonlinearity == 'tanh':
            self.f = self.x.tanh
        elif nonlinearity == 'relu':
            self.f = self.x.relu
        elif nonlinearity == 'softmax':
            raise ValueError('For softmax nonlinearity use SoftmaxBlock!')
        else:
            raise ValueError('TODO!')
        self.training_mode = True

    @property
    def df_dpref(self):
        if self.training_mode and self.learning:
            return self._df_dpref

    def fprop(self):
        self.f(self.f_context, self.output, self.df_dpref)
        self.output.fprop()

    def bprop(self):
        if hasattr(self, 'dL_dx'):
            # dL/dpref = dL/df .* df/dpref
            dL_df = self.output.backward_matrix
            self.dL_dx.add_hprod(self.b_context, dL_df, self.df_dpref)

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim = self.rng.random_integers(1500)
            x = [
                self.rng.rand(batch_size, dim).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            context = Context()
            x_gpu = List(
                [Connector(Matrix.from_npa(e), context, context) for e in x])
            smean_pooling_block_gpu = SequentialMeanPoolingBlock(x_gpu)
            x_gpu.set_length(sequence_len)
            _, dL_doutput = smean_pooling_block_gpu.output.register_usage(
                context, context)
            smean_pooling_block_gpu.fprop()
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            Matrix.from_npa(random_matrix,
                            'float').copy_to(context, dL_doutput)
            smean_pooling_block_gpu.bprop()
            dL_dmatrices_gpu = [e.backward_matrix.to_host() for e in x_gpu]

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            context = Context()
            x_cpu = List(
                [Connector(Matrix.from_npa(e), context, context) for e in x])
            smean_pooling_block_cpu = SequentialMeanPoolingBlock(x_cpu)
            x_cpu.set_length(sequence_len)
            _, dL_doutput = smean_pooling_block_cpu.output.register_usage(
                context, context)
            smean_pooling_block_cpu.fprop()
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            Matrix.from_npa(random_matrix,
                            'float').copy_to(context, dL_doutput)
            smean_pooling_block_cpu.bprop()
            dL_dmatrices_cpu = [e.backward_matrix.to_host() for e in x_cpu]

            for dL_dmatrix_gpu, dL_dmatrix_cpu in izip(dL_dmatrices_gpu,
                                                       dL_dmatrices_cpu):
                if not np.allclose(dL_dmatrix_gpu, dL_dmatrix_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N)
Пример #18
0
class SoftmaxCeBlock(object):
    """
    Softmax nonlinearity with mean cross entropy loss
    """
    def __init__(self, x, true_labels, mask=None, device_id=None):
        self.context = Context(device_id)
        device_id = self.context.device_id
        if x.bpropagable:
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        self.true_labels = true_labels.register_usage(device_id)
        if mask:
            self.mask = mask.register_usage(device_id)
        self.probs = Connector(Matrix.empty_like(self.x))
        self.loss = None

    def fprop(self):
        self.x.softmax(self.context, self.probs)
        self.probs.fprop()

    def bprop(self):
        if not hasattr(self, 'dL_dx'):
            return
        # error = (probs - true_labels) / M
        if self.true_labels.dtype == 'int':
            self.dL_dx.add_softmax_ce_derivative(self.context, self.probs,
                                                 self.true_labels)
        else:
            self.dL_dx.add_scaled_subtraction(self.context,
                                              1. / self.probs.nrows,
                                              self.probs, self.true_labels)
        if hasattr(self, 'mask'):
            self.dL_dx.hprod(self.context, self.mask)

    def calculate_loss(self, context):
        true_labels_np = self.true_labels.to_host(context)
        probs_np = self.probs.to_host(context)
        if hasattr(self, 'mask'):
            mask = self.mask.to_host(context)
            context.add_callback(self._calculate_ce_loss, true_labels_np,
                                 probs_np, mask)
        else:
            context.add_callback(self._calculate_ce_loss, true_labels_np,
                                 probs_np)

    def _calculate_ce_loss(self, true_labels_np, probs_np, mask=None):
        if self.true_labels.dtype == 'int':
            idxs = range(probs_np.shape[0]), true_labels_np.flatten()
            logs = np.log(probs_np[idxs] + 1e-20)
        else:
            logs = np.log(np.sum(true_labels_np * probs_np, axis=1) + 1e-20)
        if mask is not None:
            logs *= mask[:, 0]
            self.loss = -np.sum(logs) / np.sum(mask)
        else:
            self.loss = -np.mean(logs)
Пример #19
0
    def __init__(self, x, axis, device_id=None):
        if axis != 1:
            raise NotImplementedError
        self.axis = axis
        self.context = Context(device_id)
        device_id = self.context.device_id

        self.x = x.register_usage(device_id)
        self.output = Connector(Matrix.empty(x.nrows, 1, x.dtype, device_id))
Пример #20
0
class PtbMiniBatchesGenerator(object):
    def __init__(self, ptb_train, ptb_valid, batch_size, sentence_max_len, device_id):
        self.blocking_contexts = None
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.train_offsets = HomogeneousDataGenerator(ptb_train, batch_size, sentence_max_len, randomize=True, infinite=True)
        self.valid_offsets = HomogeneousDataGenerator(ptb_valid, batch_size, sentence_max_len)

        train_sentences = np.array([self.train_offsets.flatten_sentences])
        valid_sentences = np.array([self.valid_offsets.flatten_sentences])
        self.train_sents = Matrix.from_npa(train_sentences, 'int', device_id)
        self.valid_sents = Matrix.from_npa(valid_sentences, 'int', device_id)
        self._sent_lengths = np.empty((batch_size, 1), dtype=np.int32, order='F')[...]
        self.sent_lengths = Matrix.from_npa(self._sent_lengths, device_id=device_id)

        sentence_batch = Matrix.empty(batch_size, sentence_max_len, 'int', device_id)
        self.sentence_batch = Connector(sentence_batch, self.context)
        self.sentence_batch.sync_fill(0)

        self._mask = Matrix.empty(sentence_batch.nrows, self.sentence_batch.ncols, 'float', device_id)
        self.mask = List([Connector(self._mask[:, i]) for i in xrange(sentence_max_len)], self.sentence_batch.ncols)
        self.train_offsets_iterator = iter(self.train_offsets)
        self.valid_offsets_iterator = iter(self.valid_offsets)
        self.training_mode = True

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False

    def fprop(self):
        if self.training_mode:
            offsets = next(self.train_offsets_iterator)
            sents = self.train_sents
        else:
            try:
                offsets = next(self.valid_offsets_iterator)
                sents = self.valid_sents
            except StopIteration as e:
                self.valid_offsets_iterator = iter(self.valid_offsets)
                raise e
        self.context.wait(*self.blocking_contexts)
        self._sent_lengths = self._sent_lengths.base[:len(offsets)]
        self.sentence_batch.nrows = len(offsets)
        for k, offset in enumerate(offsets):
            self.sentence_batch[k].assign(self.context, sents[:, offset[0]:offset[1]])
            self._sent_lengths[k] = offset[1] - offset[0]
        max_sent_len = int(np.max(self._sent_lengths))
        self.sentence_batch.last_modification_context = self.context
        self.sentence_batch.ncols = max_sent_len
        self.sent_lengths.assign_npa(self.context, self._sent_lengths)
        self._mask.mask_column_numbers_row_wise(self.context, self.sent_lengths)
        for e in self.mask:
            e.last_modification_context = self.context
        self.sentence_batch.fprop()
        self.mask.fprop()
Пример #21
0
    def __init__(self, probs, true_labels, schedule, seed, device_id=None):
        self.schedule = schedule
        self.rnd = np.random.RandomState(seed)
        self.context = Context(device_id)
        device_id = self.context.device_id

        self.probs = probs.register_usage(device_id)
        self.true_labels = true_labels.register_usage(device_id)
        self.output = Connector(Matrix.empty_like(self.true_labels))
Пример #22
0
    def test_fprop_matrix(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size, output_dim = self.rng.random_integers(2000, size=2)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qrow_idxs = Connector(Matrix.from_npa(row_idxs))
                qW = Connector(Matrix.from_npa(W))
                row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
                qW.fprop()
                qrow_idxs.ncols = sequence_len
                qrow_idxs.fprop()
                row_slicing_block.fprop()
                output[processor_type] = row_slicing_block.output.to_host()

            for output_gpu, output_cpu in izip(output['gpu'], output['cpu']):
                r.append(np.allclose(output_gpu, output_cpu))

        self.assertEqual(sum(r), len(r))
Пример #23
0
    def test_theano_fprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)

            quagga.processor_type = 'gpu'
            qrow_idxs = Connector(Matrix.from_npa(row_idxs))
            qW = Connector(Matrix.from_npa(W))
            row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
            qW.fprop()
            qrow_idxs.ncols = sequence_len
            qrow_idxs.fprop()
            row_slicing_block.fprop()
            q_output = row_slicing_block.output.to_host()

            th_row_idxs = T.imatrix()
            row_slicing_layer = RowSlicingLayer(W)
            toutput = row_slicing_layer.get_output_expr(th_row_idxs)
            th_output = theano.function([th_row_idxs], toutput)(row_idxs)

            for i in xrange(sequence_len):
                r.append(np.allclose(q_output[i], th_output[i]))

        self.assertEqual(sum(r), len(r))
Пример #24
0
    def test_bprop_vector(self):
        r = []
        for _ in xrange(self.N):
            embd_dim = self.rng.random_integers(10000)
            batch_size, output_dim = self.rng.random_integers(2000, size=2)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32)
            true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32)
            device_id = 0

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qrow_idxs = Connector(Matrix.from_npa(row_idxs))
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                qW = Connector(Matrix.from_npa(W), device_id)
                row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
                sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels)
                qW.fprop()
                qrow_idxs.fprop()
                row_slicing_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                row_slicing_block.bprop()
                qW.add(Context(), qW.backward_matrix)
                output[processor_type] = qW.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))
Пример #25
0
    def test_bprop(self):
        r = []
        for i in xrange(self.N):
            matrices = []
            ncols = self.rng.random_integers(1, 3000)
            nrows = [0]
            row_slices = []
            device_ids = []
            for _ in xrange(self.rng.random_integers(1, 10)):
                _nrows = self.rng.random_integers(1, 2000)
                nrows.append(nrows[-1] + _nrows)
                if self.rng.choice([True, False]):
                    device_ids.append(0)
                    row_slices.append((nrows[-2], nrows[-1]))
                else:
                    device_ids.append(None)
                matrices.append(
                    self.rng.rand(_nrows, ncols).astype(np.float32))
            true_labels = self.rng.randint(ncols, size=(nrows[-1],
                                                        1)).astype(np.int32)
            if not row_slices:
                r.append(True)
                continue

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qmatrices = [
                    Connector(Matrix.from_npa(m), d_id)
                    for m, d_id in izip(matrices, device_ids)
                ]
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                vstack_block = VerticalStackBlock(*qmatrices)
                sce_block = SoftmaxCeBlock(vstack_block.output, qtrue_labels)

                for m in qmatrices:
                    m.fprop()
                qtrue_labels.fprop()
                vstack_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                vstack_block.bprop()

                output[processor_type] = [
                    m.backward_matrix.to_host() for m in qmatrices
                    if m.bpropagable
                ]

            for dL_dm_gpu, dL_dm_cpu in izip(output['gpu'], output['cpu']):
                if not np.allclose(dL_dm_gpu, dL_dm_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)
        self.assertEqual(sum(r), self.N)
Пример #26
0
 def __init__(self, W, col_indexes):
     device_id = W.device_id
     self.context = Context(device_id)
     learning = W.bpropagable
     if learning:
         self.W, self.dL_dW = W.register_usage_with_sparse_backward_matrix()
     else:
         self.W = W.register_usage(device_id)
     self.col_indexes = col_indexes.register_usage(device_id)
     output = Matrix.empty(W.nrows, col_indexes.ncols, device_id=device_id)
     self.output = Connector(output, device_id if learning else None)
Пример #27
0
    def __init__(self, R, b, grad_clipping, mask, prev_c, prev_h, device_id=None):
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        if R.bpropagable:
            self.R, self.dL_dR = R.register_usage(device_id, device_id)
            self.R_b_context = Context(device_id)
        else:
            self.R = R.register_usage(device_id)
        if b.bpropagable:
            self.b, self.dL_db = b.register_usage(device_id, device_id)
            self.b_b_context = Context(device_id)
        else:
            self.b = b.register_usage(device_id)
        self.grad_clipping = grad_clipping
        if mask:
            self.mask = mask.register_usage(device_id)
        if prev_c.bpropagable:
            self.prev_c, self.dL_dprev_c = prev_c.register_usage(device_id, device_id)
        else:
            self.prev_c = prev_c.register_usage(device_id)
        if prev_h.bpropagable:
            self.prev_h, self.dL_dprev_h = prev_h.register_usage(device_id, device_id)
        else:
            self.prev_h = prev_h.register_usage(device_id)
        self.learning = R.bpropagable or prev_c.bpropagable or prev_h.bpropagable
        if self.learning:
            self.b_context = Context(device_id)

        dim = self.R.nrows
        batch_size = self.prev_c.nrows

        self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id)
        self.z = self.zifo[:, 0*dim:1*dim]
        self.i = self.zifo[:, 1*dim:2*dim]
        self.f = self.zifo[:, 2*dim:3*dim]
        self.o = self.zifo[:, 3*dim:4*dim]
        self.c = Matrix.empty_like(self.prev_c, device_id)
        self.c = Connector(self.c, device_id if self.learning else None)
        self.tanh_c = Matrix.empty_like(self.c, device_id)
        self.h = Matrix.empty_like(self.c, device_id)
        self.h = Connector(self.h, device_id if self.learning else None)

        if self.learning:
            self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo)
            self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0*dim:1*dim]
            self.di_dpre_i = self._dzifo_dpre_zifo[:, 1*dim:2*dim]
            self.df_dpre_f = self._dzifo_dpre_zifo[:, 2*dim:3*dim]
            self.do_dpre_o = self._dzifo_dpre_zifo[:, 3*dim:4*dim]
            self.dL_dpre_zifo = self._dzifo_dpre_zifo
            self.dL_dpre_z = self.dz_dpre_z
            self.dL_dpre_i = self.di_dpre_i
            self.dL_dpre_f = self.df_dpre_f
            self.dL_dpre_o = self.do_dpre_o
            self._dtanh_c_dc = Matrix.empty_like(self.c)
Пример #28
0
 def __init__(self, x):
     device_id = x[0].device_id
     learning = x[0].bpropagable
     self.context = Context(device_id)
     self.output = Matrix.empty_like(x[0])
     self.output = Connector(self.output, device_id if learning else None)
     if learning:
         self.x, self.dL_dx = izip(*x.register_usage(device_id, device_id))
     else:
         self.x = x.register_usage(device_id)
     self.last_idx = x.length - 1
Пример #29
0
class DropoutBlock(object):
    """
    Sets elements of input matrix ``x`` to zero with probability
    ``dropout_prob`` in training mode. Scales ``x`` by factor of
    ``1-dropout_prob`` during in testing mode.

    Parameters
    ----------
    dropout_prob : float
    x : :class:`~quagga.matrix.CpuMatrix` or :class:`~quagga.matrix.GpuMatrix`
    seed : int
    device_id : int
        Defines the device's id on which the computation will take place

    Notes
    -----
    The dropout block is a regularizer that randomly sets input values to zero
    in training mode. This procedure is supposed to improve generalization.
    During testing, the dropout block scales input values.
    """
    def __init__(self, dropout_prob, x, seed=42, device_id=None):
        self.dropout_prob = dropout_prob
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.generator = Matrix.get_random_generator(seed)
        if x.bpropagable:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        self.output = Matrix.empty_like(self.x)
        self.output = Connector(self.output,
                                device_id if x.bpropagable else None)
        self.training_mode = True

    def fprop(self):
        if self.training_mode:
            self.x.dropout(self.f_context, self.generator, self.dropout_prob,
                           self.output)
        else:
            self.x.scale(self.f_context, 1.0 - self.dropout_prob, self.output)
        self.output.fprop()

    def bprop(self):
        if hasattr(self, 'dL_dx') and self.training_mode:
            dL_doutput = self.output.backward_matrix
            self.dL_dx.add_mask_zeros(self.b_context, dL_doutput, self.output)

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False
Пример #30
0
 def __init__(self, **kwargs):
     self.parameters = {}
     self.trainable_parameters = {}
     for name, definition in kwargs.iteritems():
         device_id = definition['device_id']
         matrix = Matrix.from_npa(definition['init'](), device_id=device_id)
         if 'trainable' not in definition or definition['trainable']:
             param = Connector(matrix, device_id)
             self.trainable_parameters[name] = param
         else:
             param = Connector(matrix)
         self.parameters[name] = param
Пример #31
0
 def __init__(self, x, true_labels, mask=None, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     if x.bpropagable:
         self.x, self.dL_dx = x.register_usage(device_id, device_id)
     else:
         self.x = x.register_usage(device_id)
     self.true_labels = true_labels.register_usage(device_id)
     if mask:
         self.mask = mask.register_usage(device_id)
     self.probs = Connector(Matrix.empty_like(self.x))
     self.loss = None
Пример #32
0
 def __init__(self, matrices, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     self.output = Matrix.empty_like(matrices[0], device_id)
     learning = matrices[0].bpropagable
     self.output = Connector(self.output, device_id if learning else None)
     if learning:
         self.matrices, self.dL_dmatrices = izip(
             *matrices.register_usage(device_id, device_id))
     else:
         self.matrices = matrices.register_usage(device_id)
     self.length = matrices.length
Пример #33
0
class SoftmaxCeBlock(object):
    """
    Softmax nonlinearity with mean cross entropy loss
    """

    def __init__(self, x, true_labels, mask=None, device_id=None):
        self.context = Context(device_id)
        device_id = self.context.device_id
        if x.bpropagable:
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        self.true_labels = true_labels.register_usage(device_id)
        if mask:
            self.mask = mask.register_usage(device_id)
        self.probs = Connector(Matrix.empty_like(self.x))
        self.loss = None

    def fprop(self):
        self.x.softmax(self.context, self.probs)
        self.probs.fprop()

    def bprop(self):
        if not hasattr(self, 'dL_dx'):
            return
        # error = (probs - true_labels) / M
        if self.true_labels.dtype == 'int':
            self.dL_dx.add_softmax_ce_derivative(self.context, self.probs, self.true_labels)
        else:
            self.dL_dx.add_scaled_subtraction(self.context, 1. / self.probs.nrows, self.probs, self.true_labels)
        if hasattr(self, 'mask'):
            self.dL_dx.hprod(self.context, self.mask)

    def calculate_loss(self, context):
        true_labels_np = self.true_labels.to_host(context)
        probs_np = self.probs.to_host(context)
        if hasattr(self, 'mask'):
            mask = self.mask.to_host(context)
            context.add_callback(self._calculate_ce_loss, true_labels_np, probs_np, mask)
        else:
            context.add_callback(self._calculate_ce_loss, true_labels_np, probs_np)

    def _calculate_ce_loss(self, true_labels_np, probs_np, mask=None):
        if self.true_labels.dtype == 'int':
            idxs = range(probs_np.shape[0]), true_labels_np.flatten()
            logs = np.log(probs_np[idxs] + 1e-20)
        else:
            logs = np.log(np.sum(true_labels_np * probs_np, axis=1) + 1e-20)
        if mask is not None:
            logs *= mask[:, 0]
            self.loss = - np.sum(logs) / np.sum(mask)
        else:
            self.loss = - np.mean(logs)
Пример #34
0
class DataBlock(object):
    def __init__(self, word_to_idx, device_id):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.word_idx = Connector(Matrix.empty(1, 1, 'int', device_id))
        self.word_to_idx = word_to_idx
        self.word = None

    def fprop(self):
        word_npa = np.zeros((1, 1), np.int32, 'F')
        word_npa[0][0] = self.word_to_idx[self.word] if self.word in self.word_to_idx else self.word_to_idx['<UNK>']
        self.word_idx.assign_npa(self.context, word_npa)
        self.word_idx.fprop()
Пример #35
0
 def __init__(self, dropout_prob, x, seed=42, device_id=None):
     self.dropout_prob = dropout_prob
     self.f_context = Context(device_id)
     device_id = self.f_context.device_id
     self.generator = Matrix.get_random_generator(seed)
     if x.bpropagable:
         self.b_context = Context(device_id)
         self.x, self.dL_dx = x.register_usage(device_id, device_id)
     else:
         self.x = x.register_usage(device_id)
     self.output = Matrix.empty_like(self.x)
     self.output = Connector(self.output, device_id if x.bpropagable else None)
     self.training_mode = True
Пример #36
0
class DataBlock(object):
    def __init__(self, char_to_idx, device_id):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.char_idx = Connector(Matrix.empty(1, 1, 'int', device_id))
        self.char_to_idx = char_to_idx
        self.char = None

    def fprop(self):
        char_npa = np.zeros((1, 1), np.int32, 'F')
        char_npa[0][0] = self.char_to_idx[self.char] if self.char in self.char_to_idx else self.char_to_idx['<unk>']
        self.char_idx.assign_npa(self.context, char_npa)
        self.char_idx.fprop()
Пример #37
0
class GaussianNoiseBlock(object):
    """
    Adds Gaussian noise to the block's input. Adding Gaussian noise can be
    viewed as a regularization.


    Parameters
    ----------
    mean : float
            Expected value of Gaussian noise
    std : float
            Standard deviation of added Gaussian noise
    x : matrix
            Block's input
    seed : int
            Seed for :func:`quagga.cuda.curand.create_generator`
    device_id: int
            Defines the device's id on which the computation will take place
    """
    def __init__(self, mean, std, x, seed=42, device_id=None):
        self.mean = mean
        self.std = std
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.generator = Matrix.get_random_generator(seed)
        if x.bpropagable:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        self.output = Matrix.empty_like(self.x)
        self.output = Connector(self.output,
                                device_id if x.bpropagable else None)
        self.training_mode = True

    def fprop(self):
        if self.training_mode:
            self.x.add_gaussian_noise(self.f_context, self.generator,
                                      self.mean, self.std, self.output)
        else:
            self.output.assign(self.f_context, self.x)
        self.output.fprop()

    def bprop(self):
        self.dL_dx.add(self.b_context, self.output.backward_matrix)

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False
Пример #38
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim_x, dim_y = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.rand(batch_size, dim_x).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            y = [
                self.rng.rand(batch_size, dim_y).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            x_gpu = List([Connector(Matrix.from_npa(e)) for e in x])
            y_gpu = List([Connector(Matrix.from_npa(e)) for e in y])
            seq_hstack_block_gpu = SequentialHorizontalStackBlock(x_gpu, y_gpu)
            x_gpu.length = sequence_len
            y_gpu.length = sequence_len
            if sequence_len == 0:
                pass
            seq_hstack_block_gpu.fprop()
            output_sequence_gpu = seq_hstack_block_gpu.output.to_host()

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            x_cpu = List([Connector(Matrix.from_npa(e)) for e in x])
            y_cpu = List([Connector(Matrix.from_npa(e)) for e in y])
            seq_hstack_block_cpu = SequentialHorizontalStackBlock(x_cpu, y_cpu)
            x_cpu.length = sequence_len
            y_cpu.length = sequence_len
            seq_hstack_block_cpu.fprop()
            output_sequence_cpu = seq_hstack_block_cpu.output.to_host()

            for out_gpu, out_cpu in izip(output_sequence_gpu,
                                         output_sequence_cpu):
                if not np.allclose(out_gpu, out_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N)
Пример #39
0
class DataBlock(object):
    def __init__(self, data, char_to_idx, batch_size, x_device_id,
                 y_device_id):
        self.data = HomogeneousDataIterator(data, char_to_idx, batch_size,
                                            True, True)
        self.data_iterator = iter(self.data)
        self.x_context = Context(x_device_id)
        self.y_context = Context(y_device_id)
        max_len = 0
        for sub_line in data:
            cur_len = len(sub_line)
            if cur_len > max_len:
                max_len = cur_len
        print max_len
        self.x = Connector(
            Matrix.empty(batch_size, max_len - 1, 'int', x_device_id))
        self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id)
        self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)],
                      self.x.ncols)
        self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id)
        self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float',
                                  x_device_id)
        self.mask = List(
            [Connector(self._mask[:, i]) for i in xrange(max_len)],
            self.x.ncols)
        self.blocking_contexts = None

    def fprop(self):
        self.x_context.wait(*self.blocking_contexts)
        self.y_context.wait(*self.blocking_contexts)
        data = next(self.data_iterator)
        lengths_npa = np.array([[len(e) - 1] for e in data],
                               np.int32,
                               order='F')
        x_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            x_npa[k, :len(e) - 1] = e[:-1]
        self.x.assign_npa(self.x_context, x_npa)
        y_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            y_npa[k, :len(e) - 1] = e[1:]
        self._y.assign_npa(self.y_context, y_npa)
        for e in self.y:
            e.last_modification_context = self.y_context
        self.lengths.assign_npa(self.x_context, lengths_npa)
        self._mask.mask_column_numbers_row_wise(self.x_context, self.lengths)
        for e in self.mask:
            e.last_modification_context = self.x_context
        self.x.fprop()
        self.y.fprop()
        self.mask.fprop()
Пример #40
0
class DropoutBlock(object):
    """
    Sets elements of input matrix ``x`` to zero with probability
    ``dropout_prob`` in training mode. Scales ``x`` by factor of
    ``1-dropout_prob`` during in testing mode.

    Parameters
    ----------
    dropout_prob : float
    x : Matrix (GpuMatrix or CpuMatrix)
    seed : int
    device_id : int
        Defines the device's id on which the computation will take place

    Notes
    -----
    The dropout block is a regularizer that randomly sets input values to zero
    in training mode. This procedure is supposed to improve generalization.
    During testing, the dropout block scales input values.
    """
    def __init__(self, dropout_prob, x, seed=42, device_id=None):
        self.dropout_prob = dropout_prob
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.generator = Matrix.get_random_generator(seed)
        if x.bpropagable:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        self.output = Matrix.empty_like(self.x)
        self.output = Connector(self.output, device_id if x.bpropagable else None)
        self.training_mode = True

    def fprop(self):
        if self.training_mode:
            self.x.dropout(self.f_context, self.generator, self.dropout_prob, self.output)
        else:
            self.x.scale(self.f_context, 1.0 - self.dropout_prob, self.output)
        self.output.fprop()

    def bprop(self):
        if hasattr(self, 'dL_dx') and self.training_mode:
            dL_doutput = self.output.backward_matrix
            self.dL_dx.add_mask_zeros(self.b_context, dL_doutput, self.output)

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False
Пример #41
0
class AttentionBlock(object):
    """
    Location based attention block
    out = sum_{i=1}^{T}a_i * h_i
    a_i = softmax(h_i * u)
    """
    def __init__(self, matrices, u, mask=None, device_id=None):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.output = Matrix.empty_like(matrices[0], device_id)
        learning = matrices[0].bpropagable or u.bpropagable
        self.output = Connector(self.output, device_id if learning else None)
        if matrices[0].bpropagable:
            self.matrices, self.dL_dmatrices = \
                izip(*matrices.register_usage(device_id, device_id))
        else:
            self.matrices = matrices.register_usage(device_id)
        self.length = matrices.length
        if u.bpropagable:
            self.u, self.dL_du = u.register_usage(device_id, device_id)
        else:
            self.u = u.register_usage(device_id)
        if mask:
            self.mask = mask.register_usage(device_id)
        self.a = Matrix.empty(matrices[0].nrows, matrices.length,
                              'float', device_id)
        self.dL_dpre_a = Matrix.empty_like(self.a)
        self.a_cols = [self.a[:, i] for i in xrange(len(self.matrices))]

    def fprop(self):
        for i in xrange(self.length):
            self.a_cols[i].assign_dot(self.context, self.matrices[i], self.u)
        if hasattr(self, 'mask'):
            self.a.fill(self.context, -3.402823466e+38, self.mask, 0.0)
        self.a.softmax(self.context, self.a)
        self.output.assign_sequential_weighted_sum(self.context, self.a,
                                                   self.matrices[:self.length])
        self.output.fprop()

    def bprop(self):
        dL_doutput = self.output.backward_matrix
        self.dL_dpre_a.assign_dL_dpre_a(self.context, dL_doutput, self.a,
                                        self.matrices[:self.length])
        if hasattr(self, 'dL_dmatrices'):
            Matrix.add_attention_tile(self.context, dL_doutput, self.a,
                                      self.dL_dpre_a, self.u,
                                      self.dL_dmatrices[:self.length])
        if hasattr(self, 'dL_du'):
            self.dL_du.add_attention_derivative(self.context, self.dL_dpre_a,
                                                self.matrices[:self.length])
Пример #42
0
class GaussianNoiseBlock(object):
    """
    Adds Gaussian noise to the block's input. Adding Gaussian noise can be
    viewed as a regularization.


    Parameters
    ----------
    mean : float
            Expected value of Gaussian noise
    std : float
            Standard deviation of added Gaussian noise
    x : matrix
            Block's input
    seed : int
            Seed for :func:`~quagga.cuda.curand.create_generator`
    device_id: int
            Defines the device's id on which the computation will take place
    """
    def __init__(self, mean, std, x, seed=42, device_id=None):
        self.mean = mean
        self.std = std
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.generator = Matrix.get_random_generator(seed)
        if x.bpropagable:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        self.output = Matrix.empty_like(self.x)
        self.output = Connector(self.output, device_id if x.bpropagable else None)
        self.training_mode = True

    def fprop(self):
        if self.training_mode:
            self.x.add_gaussian_noise(self.f_context, self.generator, self.mean, self.std, self.output)
        else:
            self.output.assign(self.f_context, self.x)
        self.output.fprop()

    def bprop(self):
        self.dL_dx.add(self.b_context, self.output.backward_matrix)

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False
Пример #43
0
class SigmoidCeBlock(object):
    """
    Sigmoid nonlinearity with mean cross entropy loss
    """

    def __init__(self, x, true_labels, mask=None, device_id=None):
        self.context = Context(device_id)
        device_id = self.context.device_id
        if x.bpropagable:
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
        else:
            self.x = x.register_usage(device_id)
        self.true_labels = true_labels.register_usage(device_id)
        if mask:
            self.mask = mask.register_usage(device_id)
        self.probs = Connector(Matrix.empty_like(self.x))
        self.loss = None

    def fprop(self):
        self.x.sigmoid(self.context, self.probs)
        self.probs.fprop()

    def bprop(self):
        # error = (probs - true_labels) / M
        self.dL_dx.add_scaled_subtraction(self.context,
                                          1. / float(self.probs.nrows),
                                          self.probs, self.true_labels)
        if hasattr(self, 'mask'):
            self.dL_dx.hprod(self.context, self.mask)

    def calculate_loss(self, context):
        true_labels_np = self.true_labels.to_host(context)
        probs_np = self.probs.to_host(context)
        if hasattr(self, 'mask'):
            mask = self.mask.to_host(context)
            context.add_callback(self._calculate_ce_loss,
                                 true_labels_np, probs_np, mask)
        else:
            context.add_callback(self._calculate_ce_loss,
                                 true_labels_np, probs_np)

    def _calculate_ce_loss(self, true_labels_np, probs_np, mask=None):
        logs = true_labels_np * np.log(probs_np + 1e-20) + \
               (1.0 - true_labels_np) * np.log(1. - probs_np + 1e-20)
        if mask is not None:
            logs *= mask
            self.loss = - np.sum(logs) / (np.sum(mask) * logs.shape[1])
        else:
            self.loss = - np.mean(logs)
Пример #44
0
    def test_bprop(self):
        r = []
        for i in xrange(self.N):
            matrices = []
            nrows = self.rng.random_integers(1, 3000)
            ncols = [0]
            col_slices = []
            device_ids = []
            for _ in xrange(self.rng.random_integers(1, 10)):
                _ncols = self.rng.random_integers(1, 2000)
                ncols.append(ncols[-1] + _ncols)
                if self.rng.choice([True, False]):
                    device_ids.append(0)
                    col_slices.append((ncols[-2], ncols[-1]))
                else:
                    device_ids.append(None)
                matrices.append(self.rng.rand(nrows, _ncols).astype(np.float32))
            true_labels = self.rng.randint(ncols[-1], size=(nrows, 1)).astype(np.int32)
            if not col_slices:
                r.append(True)
                continue

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qmatrices = [Connector(Matrix.from_npa(m), d_id) for m, d_id in izip(matrices, device_ids)]
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                hstack_block = HorizontalStackBlock(*qmatrices)
                sce_block = SoftmaxCeBlock(hstack_block.output, qtrue_labels)

                for m in qmatrices:
                    m.fprop()
                qtrue_labels.fprop()
                hstack_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                hstack_block.bprop()

                output[processor_type] = [m.backward_matrix.to_host()
                                          for m in qmatrices if m.bpropagable]

            for dL_dm_gpu, dL_dm_cpu in izip(output['gpu'], output['cpu']):
                if not np.allclose(dL_dm_gpu, dL_dm_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)
        self.assertEqual(sum(r), self.N)
Пример #45
0
    def __init__(self, x, nonlinearity, device_id=None):
        """


        """
        self.f_context = Context(device_id)
        device_id = self.f_context.device_id
        self.learning = x.bpropagable
        if self.learning:
            self.b_context = Context(device_id)
            self.x, self.dL_dx = x.register_usage(device_id, device_id)
            self._df_dpref = Matrix.empty_like(self.x, device_id)
        else:
            self.x = x.register_usage(device_id)
        output = Matrix.empty_like(x, device_id)
        self.output = Connector(output, device_id if self.learning else None)
        if nonlinearity == "sigmoid":
            self.f = self.x.sigmoid
        elif nonlinearity == "tanh":
            self.f = self.x.tanh
        elif nonlinearity == "relu":
            self.f = self.x.relu
        elif nonlinearity == "softmax":
            raise ValueError("For softmax nonlinearity use SoftmaxBlock!")
        else:
            raise ValueError("TODO!")
        self.training_mode = True
Пример #46
0
class HorizontalStackBlock(object):
    """
    Concatenates input matrices horizontally.

    Parameters
    ----------
    matrices : Matrix (GpuMatrix or CpuMatrix)
        Input matrices that need to be concatenated.
    device_id: int
        Defines the device's id on which the computation will take place
    """

    def __init__(self, *matrices, **kwargs):
        # TODO(sergii): change hsplit to aditive_hsplit for propper gradients accumulation
        self.context = Context(kwargs.get('device_id'))
        device_id = self.context.device_id
        self.matrices = []
        self.dL_dmatrices = []
        self.bpropagable = []
        for matrix in matrices:
            self.bpropagable.append(matrix.bpropagable)
            if matrix.bpropagable:
                matrix, dL_dmatrix = matrix.register_usage(device_id, device_id)
                self.dL_dmatrices.append(dL_dmatrix)
            else:
                matrix = matrix.register_usage(device_id)
            self.matrices.append(matrix)
        ncols = [matrix.ncols for matrix in matrices]
        ncols = sum([e for e in ncols[1:]], ncols[0])
        dtype = matrices[0].dtype
        bu_device_id = device_id if self.dL_dmatrices else None
        output = Matrix.empty(matrices[0].nrows, ncols, dtype, device_id)
        self.output = Connector(output, bu_device_id)

    def fprop(self):
        self.output.assign_hstack(self.context, self.matrices)
        self.output.fprop()

    def bprop(self):
        if self.dL_dmatrices:
            col_slices = []
            ncols = [0]
            for matrix, bpropagable in izip(self.matrices, self.bpropagable):
                ncols.append(ncols[-1] + int(matrix.ncols))
                if bpropagable:
                    col_slices.append((ncols[-2], ncols[-1]))
            self.output.backward_matrix.hsplit(self.context, self.dL_dmatrices, col_slices)
Пример #47
0
class ScheduledSamplingBlock(object):
    def __init__(self, probs, true_labels, schedule, seed, device_id=None):
        self.schedule = schedule
        self.rnd = np.random.RandomState(seed)
        self.context = Context(device_id)
        device_id = self.context.device_id

        self.probs = probs.register_usage(device_id)
        self.true_labels = true_labels.register_usage(device_id)
        self.output = Connector(Matrix.empty_like(self.true_labels))

    def fprop(self):
        if self.rnd.binomial(1, self.schedule.value):
            self.output.assign(self.context, self.true_labels)
        else:
            self.probs.argmax(self.context, self.output, axis=1)
        self.output.fprop()
Пример #48
0
 def __init__(self, x, device_id=None):
     self.context = Context(device_id)
     device_id = self.context.device_id
     self.learning = x.bpropagable
     if self.learning:
         self.x, self.dL_dx = x.register_usage(device_id, device_id)
     else:
         self.x = x.register_usage(device_id)
     self.x = x.register_usage(device_id)
     self.output = Connector(Matrix.empty_like(self.x), device_id if self.learning else None)
Пример #49
0
    def test_theano_bprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(2, max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)
            true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            device_id = 0

            quagga.processor_type = 'gpu'
            qrow_idxs = Connector(Matrix.from_npa(row_idxs))
            qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols)
            qW = Connector(Matrix.from_npa(W), device_id)
            row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
            seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                           params=[],
                                           sequences=[row_slicing_block.output, qtrue_labels])
            qW.fprop()
            qrow_idxs.ncols = sequence_len
            qrow_idxs.fprop()
            row_slicing_block.fprop()
            seq_sce_block.fprop()
            seq_sce_block.bprop()
            row_slicing_block.bprop()
            qW.add(Context(), qW.backward_matrix)

            th_row_idxs = T.imatrix()
            th_true_labels = T.imatrix()
            row_slicing_layer = RowSlicingLayer(W)
            toutput = row_slicing_layer.get_output_expr(th_row_idxs)
            loss = SequentialSoftmaxLayer.get_loss(toutput, th_true_labels)
            dL_dW = T.grad(loss, row_slicing_layer.W)
            fun = theano.function([th_row_idxs, th_true_labels],
                                  updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)])
            fun(row_idxs, np.hstack(true_labels[:sequence_len]))

            r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value(), atol=1e-5))

        self.assertEqual(sum(r), len(r))
Пример #50
0
    def test_theano_grad(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            for sparse in [True, False]:
                batch_size, dim = self.rng.random_integers(2000, size=2)
                if sparse:
                    true_labels = np.zeros((batch_size, dim), np.float32)
                    for k, j in enumerate(self.rng.randint(dim, size=batch_size)):
                        true_labels[k, j] = 1.0
                else:
                    true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32)
                x = self.rng.randn(batch_size, dim).astype(np.float32)
                mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32)
                device_id = 0
                for with_mask in [False, True]:
                    # Theano model
                    th_x = T.fmatrix()
                    th_mask = T.fcol()
                    th_true_labels = T.fmatrix() if sparse else T.ivector()
                    if with_mask:
                        probs = T.nnet.softmax(th_mask * th_x)
                    else:
                        probs = T.nnet.softmax(th_x)
                    loss = T.mean(T.nnet.categorical_crossentropy(probs, th_true_labels))
                    if with_mask:
                        get_theano_grads = theano.function([th_x, th_true_labels, th_mask], T.grad(loss, wrt=th_x))
                        th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0], mask)
                    else:
                        get_theano_grads = theano.function([th_x, th_true_labels], T.grad(loss, wrt=th_x))
                        th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0])

                    # quagga model
                    x_gpu = Connector(Matrix.from_npa(x), device_id)
                    true_labels_gpu = Connector(Matrix.from_npa(true_labels))
                    mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None
                    softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu)
                    x_gpu.fprop()
                    true_labels_gpu.fprop()
                    if with_mask:
                        mask_gpu.fprop()
                    softmax_ce_block.fprop()
                    softmax_ce_block.bprop()
                    q_dL_dx = x_gpu.backward_matrix.to_host()

                    r.append(np.allclose(th_dL_dx, q_dL_dx))

        self.assertEqual(sum(r), len(r))
Пример #51
0
class RowSlicingBlock(object):
    def __init__(self, W, row_indexes, dense=True):
        self.dense = dense
        device_id = W.device_id
        self.context = Context(device_id)
        learning = W.bpropagable
        if learning:
            if dense:
                self.W, self.dL_dW = W.register_usage(device_id, device_id)
            else:
                self.W, self.dL_dW = W.register_usage_with_sparse_backward_matrix()
        else:
            self.W = W.register_usage(device_id)
        self.row_indexes = row_indexes.register_usage(device_id)
        if row_indexes.ncols > 1:
            self.output = []
            for i in xrange(row_indexes.ncols):
                output = Matrix.empty(row_indexes.nrows, W.ncols, device_id=device_id)
                output = Connector(output, device_id if learning else None)
                self.output.append(output)
            self.output = List(self.output, row_indexes.ncols)
        else:
            output = Matrix.empty(row_indexes.nrows, W.ncols, device_id=device_id)
            self.output = Connector(output, device_id if learning else None)

    def fprop(self):
        if isinstance(self.output, List):
            self.W.slice_rows_batch(self.context, self.row_indexes, self.output)
        else:
            self.W.slice_rows(self.context, self.row_indexes, self.output)
        self.output.fprop()

    def bprop(self):
        if hasattr(self, 'dL_dW'):
            if isinstance(self.output, List):
                update_method = self.dL_dW.add_rows_batch_slice
            else:
                update_method = self.dL_dW.add_rows_slice
            if self.dense:
                update_method(self.context, self.row_indexes, self.output.bprop())
            else:
                update_method(self.row_indexes, self.output.bprop())
Пример #52
0
class DataBlock(object):
    def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id):
        self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True)
        self.data_iterator = iter(self.data)
        self.x_context = Context(x_device_id)
        self.y_context = Context(y_device_id)
        max_len = 0
        for sub_line in data:
            cur_len = len(sub_line)
            if cur_len > max_len:
                max_len = cur_len
        print max_len
        self.x = Connector(Matrix.empty(batch_size, max_len - 1, 'int', x_device_id))
        self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id)
        self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols)
        self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id)
        self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id)
        self.mask = List([Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols)
        self.blocking_contexts = None

    def fprop(self):
        self.x_context.wait(*self.blocking_contexts)
        self.y_context.wait(*self.blocking_contexts)
        data = next(self.data_iterator)
        lengths_npa = np.array([[len(e) - 1] for e in data], np.int32, order='F')
        x_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            x_npa[k, :len(e) - 1] = e[:-1]
        self.x.assign_npa(self.x_context, x_npa)
        y_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            y_npa[k, :len(e) - 1] = e[1:]
        self._y.assign_npa(self.y_context, y_npa)
        for e in self.y:
            e.last_modification_context = self.y_context
        self.lengths.assign_npa(self.x_context, lengths_npa)
        self._mask.mask_column_numbers_row_wise(self.x_context, self.lengths)
        for e in self.mask:
            e.last_modification_context = self.x_context
        self.x.fprop()
        self.y.fprop()
        self.mask.fprop()
Пример #53
0
    def test_fprop(self):
        r = []
        for i in xrange(self.N):
            repeats = self.rng.random_integers(42)
            axis = self.rng.randint(2)
            input_dim, output_dim = self.rng.random_integers(2000, size=2)
            x = self.get_normal_matrix(input_dim, output_dim)

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qx = Connector(Matrix.from_npa(x))
                repeat_block = RepeatBlock(qx, repeats, axis)
                qx.fprop()
                repeat_block.fprop()
                output[processor_type] = repeat_block.output.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))