Пример #1
0
    def _forward_grouped_convolution_xp(self, x, W, b, xp):
        # G: group count
        # N: batch size
        # xC: input channels
        # yC: output channels
        G = self.groups
        N, xC = x.shape[:2]
        x_size = x.shape[2:]
        yCg = W.shape[1]
        yC = yCg * G
        xCg = xC // G
        k_size = W.shape[2:]
        dims = len(k_size)
        if xC % G != 0:
            raise TypeError('The number of groups must be '
                            'a divisor of that of input channels')

        x = xp.rollaxis(x, 1)  # (xC, N, x_size...)
        x = x.reshape(G, xCg, N * utils.size_of_shape(x_size))

        W = W.reshape(G, xCg, yCg * utils.size_of_shape(k_size))
        W = W.transpose(0, 2, 1)  # (G, yCg*k_size, xCg)

        # (G, yCg*k_size, N*x_size) = (G, yCg*k_size, xCg) @ (G, xCg, N*x_size)
        col = convolution_2d._matmul(W, x).astype(x.dtype, copy=False)

        col = col.reshape((yC,) + k_size + (N,) + x_size)
        col = xp.rollaxis(col, dims + 1)  # (N, yC, k_size..., x_size...)

        y = conv_nd.col2im_nd(col, self.stride, self.pad, self.outs,
                              dilate=self.dilate)

        if b is not None:
            y += b.reshape(1, yC, *((1,) * dims))
        return y,
Пример #2
0
    def check_log_prob(self, is_gpu):
        smp = self.sample_for_test()
        if is_gpu:
            log_prob1 = self.gpu_dist.log_prob(cuda.to_gpu(smp)).data
        else:
            log_prob1 = self.cpu_dist.log_prob(smp).data

        if self.continuous:
            scipy_prob = self.scipy_dist.logpdf
        else:
            scipy_prob = self.scipy_dist.logpmf

        if self.scipy_onebyone:
            onebyone_smp = smp.reshape(*[
                utils.size_of_shape(sh)
                for sh in [self.sample_shape, self.shape, self.event_shape]
            ])
            onebyone_smp = numpy.swapaxes(onebyone_smp, 0, 1)
            onebyone_smp = onebyone_smp.reshape((-1, ) + self.sample_shape +
                                                self.event_shape)
            log_prob2 = []
            for one_params, one_smp in zip(self.scipy_onebyone_params_iter(),
                                           onebyone_smp):
                log_prob2.append(scipy_prob(one_smp, **one_params))
            log_prob2 = numpy.vstack(log_prob2)
            log_prob2 = log_prob2.reshape(utils.size_of_shape(self.shape),
                                          -1).T
            log_prob2 = log_prob2.reshape(self.sample_shape + self.shape)
        else:
            log_prob2 = scipy_prob(smp, **self.scipy_params)
        array.assert_allclose(log_prob1, log_prob2)
Пример #3
0
    def _forward_grouped_convolution_xp(self, x, gy, xp):
        G = self.groups
        N, iC = x.shape[:2]
        oC = gy.shape[1]
        o_size = gy.shape[2:]
        o_size_prod = utils.size_of_shape(o_size)
        k_size = self.ksize
        dims = len(o_size)
        iCg = iC // G
        oCg = oC // G

        # Do not check iCg and oCg because this class is rarely used alone

        # (N, iC, k_size..., o_size...)
        x = conv_nd.im2col_nd(x, k_size, self.stride, self.pad,
                              cover_all=self.cover_all, dilate=self.dilate)

        x = xp.rollaxis(x, 0, dims + 2)  # (iC, k_size..., N, o_size...)
        mul_len = iCg * utils.size_of_shape(k_size)
        x = x.reshape(G, mul_len, N * o_size_prod)
        x = x.transpose(0, 2, 1)  # (G, N*o_size, iCg*k_size)

        gy = xp.rollaxis(gy, 1)  # (oC, N, o_size...)
        gy = gy.reshape(G, oCg, N * o_size_prod)

        # (G, oCg, iCg*k_size) = (G, oCg, N*o_size) @ (G, N*o_size, iCg*k_size)
        gW = convolution_2d._matmul(gy, x).astype(self.W_dtype, copy=False)
        gW = gW.reshape(oC, iCg, *k_size)

        return gW,
    def _forward_grouped_convolution_xp(self, x, gy, xp):
        G = self.groups
        N, iC = x.shape[:2]
        oC = gy.shape[1]
        o_size = gy.shape[2:]
        o_size_prod = utils.size_of_shape(o_size)
        k_size = self.ksize
        dims = len(o_size)
        iCg = iC // G
        oCg = oC // G

        # Do not check iCg and oCg because this class is rarely used alone

        # (N, iC, k_size..., o_size...)
        x = conv_nd.im2col_nd(x,
                              k_size,
                              self.stride,
                              self.pad,
                              cover_all=self.cover_all,
                              dilate=self.dilate)

        x = xp.rollaxis(x, 0, dims + 2)  # (iC, k_size..., N, o_size...)
        mul_len = iCg * utils.size_of_shape(k_size)
        x = x.reshape(G, mul_len, N * o_size_prod)
        x = x.transpose(0, 2, 1)  # (G, N*o_size, iCg*k_size)

        gy = xp.rollaxis(gy, 1)  # (oC, N, o_size...)
        gy = gy.reshape(G, oCg, N * o_size_prod)

        # (G, oCg, iCg*k_size) = (G, oCg, N*o_size) @ (G, N*o_size, iCg*k_size)
        gW = convolution_2d._matmul(gy, x).astype(self.W_dtype, copy=False)
        gW = gW.reshape(oC, iCg, *k_size)

        return gW,
Пример #5
0
    def check_log_prob(self, is_gpu):
        smp = self.sample_for_test()
        if is_gpu:
            log_prob1 = self.gpu_dist.log_prob(cuda.to_gpu(smp)).data
        else:
            log_prob1 = self.cpu_dist.log_prob(smp).data

        if self.continuous:
            scipy_prob = self.scipy_dist.logpdf
        else:
            scipy_prob = self.scipy_dist.logpmf

        if self.scipy_onebyone:
            onebyone_smp = smp.reshape(*[
                utils.size_of_shape(sh)
                for sh in [self.sample_shape, self.shape, self.event_shape]])
            onebyone_smp = numpy.swapaxes(onebyone_smp, 0, 1)
            onebyone_smp = onebyone_smp.reshape((-1,) + self.sample_shape
                                                + self.event_shape)
            log_prob2 = []
            for one_params, one_smp in zip(
                    self.scipy_onebyone_params_iter(), onebyone_smp):
                log_prob2.append(scipy_prob(one_smp, **one_params))
            log_prob2 = numpy.vstack(log_prob2)
            log_prob2 = log_prob2.reshape(
                utils.size_of_shape(self.shape), -1).T
            log_prob2 = log_prob2.reshape(self.sample_shape + self.shape)
        else:
            log_prob2 = scipy_prob(smp, **self.scipy_params)
        array.assert_allclose(log_prob1, log_prob2)
Пример #6
0
    def check_sample(self, is_gpu):
        if is_gpu:
            smp1 = self.gpu_dist.sample(
                sample_shape=(100000,)+self.sample_shape).data
        else:
            smp1 = self.cpu_dist.sample(
                sample_shape=(100000,)+self.sample_shape).data

        if self.scipy_onebyone:
            smp2 = []
            for one_params in self.scipy_onebyone_params_iter():
                smp2.append(self.scipy_dist.rvs(
                    size=(100000,)+self.sample_shape, **one_params))
            smp2 = numpy.vstack(smp2)
            smp2 = smp2.reshape((utils.size_of_shape(self.shape), 100000)
                                + self.sample_shape
                                + self.cpu_dist.event_shape)
            smp2 = numpy.rollaxis(
                smp2, 0, smp2.ndim-len(self.cpu_dist.event_shape))
            smp2 = smp2.reshape((100000,) + self.sample_shape + self.shape
                                + self.cpu_dist.event_shape)
        else:
            smp2 = self.scipy_dist.rvs(
                size=(100000,) + self.sample_shape + self.shape,
                **self.scipy_params)
        array.assert_allclose(smp1.mean(axis=0), smp2.mean(axis=0),
                              atol=3e-2, rtol=3e-2)
        array.assert_allclose(smp1.std(axis=0), smp2.std(axis=0),
                              atol=3e-2, rtol=3e-2)
Пример #7
0
    def check_sample(self, is_gpu):
        if is_gpu:
            smp1 = self.gpu_dist.sample(sample_shape=(100000, ) +
                                        self.sample_shape).data
        else:
            smp1 = self.cpu_dist.sample(sample_shape=(100000, ) +
                                        self.sample_shape).data

        if self.scipy_onebyone:
            smp2 = []
            for one_params in self.scipy_onebyone_params_iter():
                smp2.append(
                    self.scipy_dist.rvs(size=(100000, ) + self.sample_shape,
                                        **one_params))
            smp2 = numpy.vstack(smp2)
            smp2 = smp2.reshape((utils.size_of_shape(self.shape), 100000) +
                                self.sample_shape + self.cpu_dist.event_shape)
            smp2 = numpy.rollaxis(smp2, 0,
                                  smp2.ndim - len(self.cpu_dist.event_shape))
            smp2 = smp2.reshape((100000, ) + self.sample_shape + self.shape +
                                self.cpu_dist.event_shape)
        else:
            smp2 = self.scipy_dist.rvs(size=(100000, ) + self.sample_shape +
                                       self.shape,
                                       **self.scipy_params)
        array.assert_allclose(smp1.mean(axis=0),
                              smp2.mean(axis=0),
                              atol=3e-2,
                              rtol=3e-2)
        array.assert_allclose(smp1.std(axis=0),
                              smp2.std(axis=0),
                              atol=3e-2,
                              rtol=3e-2)
Пример #8
0
def _as4darray(arr, mode):
    assert mode.cudnn_dim_ok
    if mode.is_for_conv2d:
        assert arr.ndim == 4
        return arr
    else:  # is_for_linear
        return arr.reshape(utils.size_of_shape(arr.shape[0:-1]), -1, 1, 1)
Пример #9
0
 def __call__(self, array):
     if self.dtype is not None:
         assert array.dtype == self.dtype
     device = backend.get_device_from_array(array)
     if not array.shape:  # 0-dim case
         array[...] = self.scale * (2 * numpy.random.randint(2) - 1)
     elif not array.size:
         raise ValueError('Array to be initialized must be non-empty.')
     else:
         # numpy.prod returns float value when the argument is empty.
         out_dim = len(array)
         in_dim = utils.size_of_shape(array.shape[1:])
         if (in_dim > out_dim and self._checks[0]) or (in_dim < out_dim
                                                       and self._checks[1]):
             raise ValueError('Cannot make orthogonal {}.'
                              'shape = {}, interpreted as '
                              '{}-dim input and {}-dim output.'.format(
                                  self.mode, array.shape, in_dim, out_dim))
         transpose = in_dim > out_dim
         a = numpy.random.normal(size=(out_dim, in_dim))
         if transpose:
             a = a.T
         # cupy.linalg.qr requires cusolver in CUDA 8+
         q, r = numpy.linalg.qr(a)
         q *= numpy.copysign(self.scale, numpy.diag(r))
         if transpose:
             q = q.T
         array[...] = device.xp.asarray(q.reshape(array.shape))
Пример #10
0
def linear(x, W, b=None, n_batch_axes=1):
    """Linear function, or affine transformation.

    It accepts two or three arguments: an input minibatch ``x``, a weight
    matrix ``W``, and optionally a bias vector ``b``. It computes

    .. math:: Y = xW^\\top + b.

    Args:
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which is a :math:`(s_1, s_2, \
            ..., s_n)`-shaped float array. Its first ``n_batch_axes``
            dimensions are handled as *minibatch dimensions*. The
            other dimensions are handled as concatenated one dimension whose
            size must be :math:`(s_{\\rm n\\_batch\\_axes} * ... * s_n = N)`.
        W (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Weight variable of shape :math:`(M, N)`,
            where :math:`(N = s_{\\rm n\\_batch\\_axes} * ... * s_n)`.
        b (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Bias variable (optional) of shape
            :math:`(M,)`.
        n_batch_axes (int): The number of batch axes. The default is 1. The
            input variable is reshaped into
            (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor.
            This should be greater than 0.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(s_1, ..., s_{\\rm n\\_batch\\_axes}, M)`.

    .. seealso:: :class:`~chainer.links.Linear`

    .. admonition:: Example

        >>> x = np.random.uniform(0, 1, (3, 4)).astype(np.float32)
        >>> W = np.random.uniform(0, 1, (5, 4)).astype(np.float32)
        >>> b = np.random.uniform(0, 1, (5,)).astype(np.float32)
        >>> y = F.linear(x, W, b)
        >>> y.shape
        (3, 5)

    """
    if n_batch_axes <= 0:
        raise ValueError('n_batch_axes should be greater than 0.')
    if n_batch_axes > 1:
        batch_shape = x.shape[:n_batch_axes]
        batch_size = utils.size_of_shape(batch_shape)
        x = x.reshape(batch_size, -1)
    elif x.ndim > 2:
        x = x.reshape(x.shape[0], -1)
    if b is None:
        args = x, W
    else:
        args = x, W, b

    y, = LinearFunction().apply(args)
    if n_batch_axes > 1:
        y = y.reshape(batch_shape + (-1,))
    return y
Пример #11
0
def linear(x, W, b=None, n_batch_axes=1):
    """Linear function, or affine transformation.

    It accepts two or three arguments: an input minibatch ``x``, a weight
    matrix ``W``, and optionally a bias vector ``b``. It computes

    .. math:: Y = xW^\\top + b.

    Args:
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which is a :math:`(s_1, s_2, \
            ..., s_n)`-shaped float array. Its first ``n_batch_axes``
            dimensions are handled as *minibatch dimensions*. The
            other dimensions are handled as concatenated one dimension whose
            size must be :math:`(s_{\\rm n\\_batch\\_axes} * ... * s_n = N)`.
        W (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Weight variable of shape :math:`(M, N)`,
            where :math:`(N = s_{\\rm n\\_batch\\_axes} * ... * s_n)`.
        b (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Bias variable (optional) of shape
            :math:`(M,)`.
        n_batch_axes (int): The number of batch axes. The default is 1. The
            input variable is reshaped into
            (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor.
            This should be greater than 0.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(s_1, ..., s_{\\rm n\\_batch\\_axes}, M)`.

    .. seealso:: :class:`~chainer.links.Linear`

    .. admonition:: Example

        >>> x = np.random.uniform(0, 1, (3, 4)).astype(np.float32)
        >>> W = np.random.uniform(0, 1, (5, 4)).astype(np.float32)
        >>> b = np.random.uniform(0, 1, (5,)).astype(np.float32)
        >>> y = F.linear(x, W, b)
        >>> y.shape
        (3, 5)

    """
    if n_batch_axes <= 0:
        raise ValueError('n_batch_axes should be greater than 0.')
    if n_batch_axes > 1:
        batch_shape = x.shape[:n_batch_axes]
        batch_size = utils.size_of_shape(batch_shape)
        x = x.reshape(batch_size, -1)
    elif x.ndim > 2:
        x = x.reshape(x.shape[0], -1)
    if b is None:
        args = x, W
    else:
        args = x, W, b

    y, = LinearFunction().apply(args)
    if n_batch_axes > 1:
        y = y.reshape(batch_shape + (-1, ))
    return y
    def _forward_grouped_convolution_xp(self, x, W, b, xp):
        # G: group count
        # N: batch size
        # iC: input channels
        # oC: output channels
        G = self.groups
        N, iC = x.shape[:2]
        oC = W.shape[0]
        k_size = W.shape[2:]
        iCg = iC // G
        oCg = oC // G
        dims = len(k_size)
        if iC % G != 0:
            raise TypeError('The number of groups must be '
                            'a divisor of that of input channels')
        if oC % G != 0:
            raise TypeError('The number of groups must be '
                            'a divisor of that of output channels')

        xp = backend.get_array_module(x)

        # (N, iC, k_size..., o_size...)
        x = conv_nd.im2col_nd(x,
                              k_size,
                              self.stride,
                              self.pad,
                              cover_all=self.cover_all,
                              dilate=self.dilate)
        o_size = x.shape[-dims:]

        x = xp.rollaxis(x, 0, dims + 2)  # (iC, k_size..., N, o_size...)
        mul_len = iCg * utils.size_of_shape(k_size)
        x = x.reshape(G, mul_len, N * utils.size_of_shape(o_size))

        W = W.reshape(G, oCg, mul_len)

        # (G, oCg, N*o_size) = (G, oCg, iCg*k_size) @ (G, iCg*k_size, N*o_size)
        y = convolution_2d._matmul(W, x).astype(x.dtype, copy=False)
        y = y.reshape(oC, N, *o_size)
        y = xp.rollaxis(y, 1)  # (N, oC, o_size...)
        if b is not None:
            y += b.reshape(1, b.size, *((1, ) * dims))

        return y,
Пример #13
0
    def forward(self, x, n_batch_axes=1):
        if self.W.array is None:
            in_size = utils.size_of_shape(x.shape[1:])
            self._initialize_params(in_size)

        return thresholded_linear(x,
                                  self.W,
                                  self.b,
                                  n_batch_axes=n_batch_axes,
                                  threshold=self.threshold)
Пример #14
0
    def forward(self, x, n_batch_axes=1):
        if self.W.array is None:
            in_size = utils.size_of_shape(x.shape[1:])
            self._initialize_params(in_size)

        return ada_loss_linear(x,
                               self.W,
                               self.b,
                               n_batch_axes=n_batch_axes,
                               ada_loss=self.ada_loss)
Пример #15
0
def get_fans(shape):
    if not isinstance(shape, tuple):
        raise ValueError('shape must be tuple')

    if len(shape) < 2:
        raise ValueError('shape must be of length >= 2: shape={}', shape)

    receptive_field_size = utils.size_of_shape(shape[2:])
    fan_in = shape[1] * receptive_field_size
    fan_out = shape[0] * receptive_field_size
    return fan_in, fan_out
Пример #16
0
def get_fans(shape):
    if not isinstance(shape, tuple):
        raise ValueError('shape must be tuple')

    if len(shape) < 2:
        raise ValueError('shape must be of length >= 2: shape={}', shape)

    receptive_field_size = utils.size_of_shape(shape[2:])
    fan_in = shape[1] * receptive_field_size
    fan_out = shape[0] * receptive_field_size
    return fan_in, fan_out
Пример #17
0
    def _forward_grouped_convolution_xp(self, x, W, b, xp):
        # G: group count
        # N: batch size
        # iC: input channels
        # oC: output channels
        G = self.groups
        N, iC = x.shape[:2]
        oC = W.shape[0]
        k_size = W.shape[2:]
        iCg = iC // G
        oCg = oC // G
        dims = len(k_size)
        if iC % G != 0:
            raise TypeError('The number of groups must be '
                            'a divisor of that of input channels')
        if oC % G != 0:
            raise TypeError('The number of groups must be '
                            'a divisor of that of output channels')

        xp = backend.get_array_module(x)

        # (N, iC, k_size..., o_size...)
        x = conv_nd.im2col_nd(x, k_size, self.stride, self.pad,
                              cover_all=self.cover_all, dilate=self.dilate)
        o_size = x.shape[-dims:]

        x = xp.rollaxis(x, 0, dims + 2)  # (iC, k_size..., N, o_size...)
        mul_len = iCg * utils.size_of_shape(k_size)
        x = x.reshape(G, mul_len, N * utils.size_of_shape(o_size))

        W = W.reshape(G, oCg, mul_len)

        # (G, oCg, N*o_size) = (G, oCg, iCg*k_size) @ (G, iCg*k_size, N*o_size)
        y = convolution_2d._matmul(W, x).astype(x.dtype, copy=False)
        y = y.reshape(oC, N, *o_size)
        y = xp.rollaxis(y, 1)  # (N, oC, o_size...)
        if b is not None:
            y += b.reshape(1, b.size, *((1,) * dims))

        return y,
Пример #18
0
def get_fans(shape):
    if not isinstance(shape, tuple):
        raise ValueError(
            'shape must be tuple. Actual type: {}'.format(type(shape)))

    if len(shape) < 2:
        raise ValueError(
            'shape must be of length >= 2. Actual shape: {}'.format(shape))

    receptive_field_size = utils.size_of_shape(shape[2:])
    fan_in = shape[1] * receptive_field_size
    fan_out = shape[0] * receptive_field_size
    return fan_in, fan_out
Пример #19
0
def get_fans(shape):
    if not isinstance(shape, tuple):
        raise ValueError('shape must be tuple. Actual type: {}'.format(
            type(shape)))

    if len(shape) < 2:
        raise ValueError(
            'shape must be of length >= 2. Actual shape: {}'.format(shape))

    receptive_field_size = utils.size_of_shape(shape[2:])
    fan_in = shape[1] * receptive_field_size
    fan_out = shape[0] * receptive_field_size
    return fan_in, fan_out
Пример #20
0
    def _forward_grouped_convolution_xp(self, x, W, b, xp):
        # G: group count
        # N: batch size
        # xC: input channels
        # yC: output channels
        G = self.groups
        N, xC = x.shape[:2]
        x_size = x.shape[2:]
        yCg = W.shape[1]
        yC = yCg * G
        xCg = xC // G
        k_size = W.shape[2:]
        dims = len(k_size)
        if xC % G != 0:
            raise TypeError('The number of groups must be '
                            'a divisor of that of input channels')

        x = xp.rollaxis(x, 1)  # (xC, N, x_size...)
        x = x.reshape(G, xCg, N * utils.size_of_shape(x_size))

        W = W.reshape(G, xCg, yCg * utils.size_of_shape(k_size))
        W = W.transpose(0, 2, 1)  # (G, yCg*k_size, xCg)

        # (G, yCg*k_size, N*x_size) = (G, yCg*k_size, xCg) @ (G, xCg, N*x_size)
        col = convolution_2d._matmul(W, x).astype(x.dtype, copy=False)

        col = col.reshape((yC, ) + k_size + (N, ) + x_size)
        col = xp.rollaxis(col, dims + 1)  # (N, yC, k_size..., x_size...)

        y = conv_nd.col2im_nd(col,
                              self.stride,
                              self.pad,
                              self.outs,
                              dilate=self.dilate)

        if b is not None:
            y += b.reshape(1, yC, *((1, ) * dims))
        return y,
Пример #21
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.array is None:
            with cuda.get_device_from_id(self._device_id):
                in_size = utils.size_of_shape(x.shape[1:])
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(self.h, [batch],
                                                         axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.array) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Пример #22
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.array is None:
            with chainer.using_device(self.device):
                in_size = utils.size_of_shape(x.shape[1:])
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    self.xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.array) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Пример #23
0
    def forward(self, x, n_batch_axes=1):
        """Applies the linear layer.

        Args:
            x (~chainer.Variable): Batch of input vectors.
            n_batch_axes (int): The number of batch axes. The default is 1. The
                input variable is reshaped into
                (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor.
                This should be greater than 0.

        Returns:
            ~chainer.Variable: Output of the linear layer.

        """
        if self.W.data is None:
            in_size = utils.size_of_shape(x.shape[1:])
            self._initialize_params(in_size)
        return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
Пример #24
0
    def forward(self, x, n_batch_axes=1):
        """Applies the linear layer.

        Args:
            x (~chainer.Variable): Batch of input vectors.
            n_batch_axes (int): The number of batch axes. The default is 1. The
                input variable is reshaped into
                (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor.
                This should be greater than 0.

        Returns:
            ~chainer.Variable: Output of the linear layer.

        """
        if self.W.array is None:
            in_size = utils.size_of_shape(x.shape[1:])
            self._initialize_params(in_size)
        return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
Пример #25
0
    def forward(self, x):
        """Apply layer normalization to given input.

        Args:
            x (~chainer.Variable): Batch vectors.
                Shape of this value must be `(batch_size, unit_size)`,
                e.g., the output of :func:`~chainer.functions.linear`.

        Returns:
            ~chainer.Variable: Output of the layer normalization.

        """
        if self.gamma.array is None:
            in_size = utils.size_of_shape(x.shape[1:])
            self._initialize_params(in_size)

        return layer_normalization.layer_normalization(
            x, self.gamma, self.beta, self.eps)
Пример #26
0
    def forward(self, x):
        """Apply layer normalization to given input.

        Args:
            x (~chainer.Variable): Batch vectors.
                Shape of this value must be `(batch_size, unit_size)`,
                e.g., the output of :func:`~chainer.functions.linear`.

        Returns:
            ~chainer.Variable: Output of the layer normalization.

        """
        if self.gamma.data is None:
            in_size = utils.size_of_shape(x.shape[1:])
            self._initialize_params(in_size)

        return layer_normalization.layer_normalization(x, self.gamma,
                                                       self.beta, self.eps)
Пример #27
0
    def forward(self, xs):
        xp = backend.get_array_module(*xs)

        if self.length is None:
            length = max(len(x) for x in xs)
        else:
            length = self.length

        shape = (len(xs), length) + xs[0].shape[1:]
        y = xp.empty(shape, xs[0].dtype)
        if length == 0:
            return y,  # y is an empty array

        if xp is numpy or any(not x._c_contiguous for x in xs):
            for i, x in enumerate(xs):
                l = len(x)
                if l == length:
                    y[i] = x
                else:
                    y[i, 0:l] = x
                    y[i, l:] = self.padding
        else:
            # This code assumes that all arrays are c_contiguous
            ptr_shape = (Ellipsis,) + (None,) * xs[0].ndim
            ptrs = cuda.cupy.array(
                [x.data for x in xs], numpy.uintp)[ptr_shape]
            lengths = cuda.cupy.array(
                [len(x) for x in xs], numpy.int32)[ptr_shape]
            base = utils.size_of_shape(xs[0].shape[1:])
            cuda.elementwise(
                'P ptr, int32 length, T pad, int32 base, int32 max_length',
                'T y',
                '''
                int d = i / base % max_length;
                if (d < length) {
                  y = reinterpret_cast<const T*>(ptr)[i % (base * max_length)];
                } else {
                  y = pad;
                }
                ''',
                'pad_sequence_fwd'
            )(ptrs, lengths, self.padding, base, length, y)

        return y,
Пример #28
0
def ada_loss_linear(x, W, b=None, n_batch_axes=1, ada_loss=None):
    """ Simply replace the LinearFunction in linear to AdaLossLinear """
    if n_batch_axes <= 0:
        raise ValueError('n_batch_axes should be greater than 0.')
    if n_batch_axes > 1:
        batch_shape = x.shape[:n_batch_axes]
        batch_size = utils.size_of_shape(batch_shape)
        x = x.reshape(batch_size, -1)
    elif x.ndim > 2:
        x = x.reshape(x.shape[0], -1)
    if b is None:
        args = x, W
    else:
        args = x, W, b

    y, = AdaLossLinearFunction(ada_loss=ada_loss).apply(args)
    if n_batch_axes > 1:
        y = y.reshape(batch_shape + (-1, ))
    return y
Пример #29
0
def thresholded_linear(x, W, b=None, n_batch_axes=1, threshold=6e-8):
    """ """
    if n_batch_axes <= 0:
        raise ValueError('n_batch_axes should be greater than 0.')
    if n_batch_axes > 1:
        batch_shape = x.shape[:n_batch_axes]
        batch_size = utils.size_of_shape(batch_shape)
        x = x.reshape(batch_size, -1)
    elif x.ndim > 2:
        x = x.reshape(x.shape[0], -1)
    if b is None:
        args = x, W
    else:
        args = x, W, b

    y, = ThresholdedLinearFunction(threshold=threshold).apply(args)
    if n_batch_axes > 1:
        y = y.reshape(batch_shape + (-1, ))
    return y
Пример #30
0
 def __call__(self, array):
     if self.dtype is not None:
         assert array.dtype == self.dtype
     xp = backend.get_array_module(array)
     if not array.shape:  # 0-dim case
         array[...] = self.scale * (2 * numpy.random.randint(2) - 1)
     elif not array.size:
         raise ValueError('Array to be initialized must be non-empty.')
     else:
         # numpy.prod returns float value when the argument is empty.
         flat_shape = (len(array), utils.size_of_shape(array.shape[1:]))
         if flat_shape[0] > flat_shape[1]:
             raise ValueError('Cannot make orthogonal system because'
                              ' # of vectors ({}) is larger than'
                              ' that of dimensions ({})'.format(
                                  flat_shape[0], flat_shape[1]))
         a = numpy.random.normal(size=flat_shape)
         # cupy.linalg.qr requires cusolver in CUDA 8+
         q, r = numpy.linalg.qr(a.T)
         q *= numpy.copysign(self.scale, numpy.diag(r))
         array[...] = xp.asarray(q.T.reshape(array.shape))
 def __call__(self, array):
     if self.dtype is not None:
         assert array.dtype == self.dtype,\
             '{} != {}'.format(array.dtype, self.dtype)
     if not array.shape:  # 0-dim case
         if self.rng is None:
             a = numpy.random.randint(2)
         else:
             a = self.rng.randint(2)
         a = int(a)
         array[...] = self.scale * (2 * a - 1)
     elif not array.size:
         raise ValueError('Array to be initialized must be non-empty.')
     else:
         # numpy.prod returns float value when the argument is empty.
         out_dim = len(array)
         in_dim = utils.size_of_shape(array.shape[1:])
         if (in_dim > out_dim and self._checks[0]) or (
                 in_dim < out_dim and self._checks[1]):
             raise ValueError(
                 'Cannot make orthogonal {}. '
                 'shape = {}, interpreted as '
                 '{}-dim input and {}-dim output.'.format(
                     self.mode, array.shape, in_dim, out_dim))
         transpose = in_dim > out_dim
         if self.rng is None:
             a = numpy.random.normal(size=(out_dim, in_dim))
         else:
             a_tmp = self.rng.normal(size=(out_dim, in_dim))
             a = numpy.empty(a_tmp.shape, dtype=a_tmp.dtype)
             backend.copyto(a, a_tmp)
         if transpose:
             a = a.T
         # cupy.linalg.qr requires cusolver in CUDA 8+
         q, r = numpy.linalg.qr(a)
         q *= numpy.copysign(self.scale, numpy.diag(r))
         if transpose:
             q = q.T
         backend.copyto(array, q.reshape(array.shape).astype(
             array.dtype, copy=False))
Пример #32
0
    def forward(self, inputs):
        gy, = inputs
        xp = backend.get_array_module(gy)
        repeats = self.repeats
        axis = self.axis
        shape = list(self.in_shape)
        dtype = self.in_dtype

        if len(gy) == 0:
            gx = xp.zeros(shape, dtype)
            return gx,

        if len(repeats) == 1:
            repeats = int(repeats[0])
            if axis is None:
                gx = gy.reshape(-1, repeats).sum(axis=1).reshape(shape)
            else:
                shape[axis:axis + 1] = [-1, repeats]
                gx = gy.reshape(shape).sum(axis=axis + 1)
            return gx,

        if axis is None:
            pos = 0
            gx = xp.zeros(utils.size_of_shape(shape), dtype)
            for (i, r) in enumerate(repeats):
                gx[i] = xp.sum(gy[pos:pos + r])
                pos += r
            gx = gx.reshape(shape)
        else:
            gx = xp.zeros(shape, dtype)
            pos = 0
            src = [slice(None)] * axis + [None]
            dst = [slice(None)] * axis + [None]
            for (i, r) in enumerate(repeats):
                src[-1] = slice(pos, pos + r)
                dst[-1] = slice(i, i + 1)
                gx[tuple(dst)] = gy[tuple(src)].sum(axis=axis, keepdims=True)
                pos += r
        return gx,
Пример #33
0
 def __call__(self, array):
     if self.dtype is not None:
         assert array.dtype == self.dtype
     xp = backend.get_array_module(array)
     if not array.shape:  # 0-dim case
         array[...] = self.scale
     elif not array.size:
         raise ValueError('Array to be initialized must be non-empty.')
     else:
         # numpy.prod returns float value when the argument is empty.
         flat_shape = (len(array), utils.size_of_shape(array.shape[1:]))
         if flat_shape[0] > flat_shape[1]:
             raise ValueError('Cannot make orthogonal system because'
                              ' # of vectors ({}) is larger than'
                              ' that of dimensions ({})'.format(
                                  flat_shape[0], flat_shape[1]))
         a = numpy.random.normal(size=flat_shape)
         # we do not have cupy.linalg.svd for now
         u, _, v = numpy.linalg.svd(a, full_matrices=False)
         # pick the one with the correct shape
         q = u if u.shape == flat_shape else v
         array[...] = xp.asarray(q.reshape(array.shape))
         array *= self.scale
Пример #34
0
def _get_tensor4d_shape(axis, shape):
    left_shape = utils.size_of_shape(shape[:axis])
    center_shape = shape[axis]
    right_shape = utils.size_of_shape(shape[axis:][1:])
    return left_shape, center_shape, right_shape, 1