Пример #1
0
    def sample_n(self, n):
        if self._is_gpu:
            eps = cuda.cupy.random.standard_normal(
                (n,)+self.loc.shape, dtype=self.loc.dtype)
        else:
            eps = numpy.random.standard_normal(
                (n,)+self.loc.shape).astype(numpy.float32)
        noise = repeat.repeat(
            expand_dims.expand_dims(self.scale, axis=0), n, axis=0) * eps
        noise += repeat.repeat(expand_dims.expand_dims(
            self.loc, axis=0), n, axis=0)

        return noise
Пример #2
0
    def sample_n(self, n):
        if self._is_gpu:
            eps = cuda.cupy.random.standard_normal(
                (n,)+self.loc.shape, dtype=self.loc.dtype)
        else:
            eps = numpy.random.standard_normal(
                (n,)+self.loc.shape).astype(numpy.float32)
        noise = repeat.repeat(
            expand_dims.expand_dims(self.scale, axis=0), n, axis=0) * eps
        noise += repeat.repeat(expand_dims.expand_dims(
            self.loc, axis=0), n, axis=0)

        return noise
Пример #3
0
    def sample_n(self, n):
        if self._is_gpu:
            eps = cuda.cupy.random.standard_normal(
                (n,)+self.loc.shape+(1,), dtype=self.loc.dtype)
        else:
            eps = numpy.random.standard_normal(
                (n,)+self.loc.shape+(1,)).astype(numpy.float32)

        noise = matmul.matmul(repeat.repeat(
            expand_dims.expand_dims(self.scale_tril, axis=0), n, axis=0), eps)
        noise = squeeze.squeeze(noise, axis=-1)
        noise += repeat.repeat(expand_dims.expand_dims(
            self.loc, axis=0), n, axis=0)

        return noise
Пример #4
0
    def sample_n(self, n):
        if self._is_gpu:
            eps = cuda.cupy.random.standard_normal(
                (n,)+self.loc.shape+(1,), dtype=self.loc.dtype)
        else:
            eps = numpy.random.standard_normal(
                (n,)+self.loc.shape+(1,)).astype(numpy.float32)

        noise = matmul.matmul(repeat.repeat(
            expand_dims.expand_dims(self.scale_tril, axis=0), n, axis=0), eps)
        noise = squeeze.squeeze(noise, axis=-1)
        noise += repeat.repeat(expand_dims.expand_dims(
            self.loc, axis=0), n, axis=0)

        return noise
 def variance(self):
     alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1)
     return (
         self.alpha
         * (alpha0 - self.alpha)
         / alpha0 ** 2
         / (alpha0 + 1))
def _kl_dirichlet_dirichlet(dist1, dist2):
    return (
        - _lbeta(dist1.alpha)
        + _lbeta(dist2.alpha)
        + sum_mod.sum(
            (dist1.alpha - dist2.alpha)
            * (digamma.digamma(dist1.alpha)
               - expand_dims.expand_dims(
                   digamma.digamma(dist1.alpha0),
                   axis=-1)),
            axis=-1))
Пример #7
0
def stack(xs, axis=0):
    """Concatenate variables along a new axis.

    Args:
        xs (list of chainer.Variable): Variables to be concatenated.
        axis (int): Axis of result along which variables are stacked.

    Returns:
        ~chainer.Variable: Output variable.

    """
    xs = [expand_dims.expand_dims(x, axis=axis) for x in xs]
    return concat.concat(xs, axis=axis)
Пример #8
0
def black_out(x, t, W, samples):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    Args:
        x (~chainer.Variable): Batch of input vectors.
        t (~chainer.Variable): Vector of ground truth labels.
        W (~chainer.Variable): Weight matrix.
        samples (~chainer.Variable): Negative samples.

    Returns:
        ~chainer.Variable: Loss value.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.batch_matmul(neg_emb, x)
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.batch_matmul(pos_emb, x)
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(
        reshape.reshape(logz, (batch_size, 1)), neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size,))
    loss = py - logz + _sum.sum(ny, axis=1)
    return -_sum.sum(loss) / batch_size
Пример #9
0
def black_out(x, t, W, samples):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    Args:
        x (~chainer.Variable): Batch of input vectors.
        t (~chainer.Variable): Vector of ground truth labels.
        W (~chainer.Variable): Weight matrix.
        samples (~chainer.Variable): Negative samples.

    Returns:
        ~chainer.Variable: Loss value.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.batch_matmul(neg_emb, x)
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.batch_matmul(pos_emb, x)
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(reshape.reshape(logz, (batch_size, 1)),
                                        neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size, ))
    loss = py - logz + _sum.sum(ny, axis=1)
    return -_sum.sum(loss) / batch_size
Пример #10
0
    def log_prob(self, x):
        scale_tril_inv = \
            _batch_triangular_inv(self.scale_tril.reshape(-1, self.d, self.d))
        scale_tril_inv = scale_tril_inv.reshape(self.batch_shape +
                                                (self.d, self.d))

        bsti = broadcast.broadcast_to(scale_tril_inv, x.shape + (self.d, ))
        bl = broadcast.broadcast_to(self.loc, x.shape)
        m = matmul.matmul(bsti, expand_dims.expand_dims(x - bl, axis=-1))
        m = matmul.matmul(swapaxes.swapaxes(m, -1, -2), m)
        m = squeeze.squeeze(m, axis=-1)
        m = squeeze.squeeze(m, axis=-1)
        logz = LOGPROBC * self.d - self._logdet(self.scale_tril)
        return broadcast.broadcast_to(logz, m.shape) - 0.5 * m
Пример #11
0
    def log_prob(self, x):
        scale_tril_inv = \
            _batch_triangular_inv(self.scale_tril.reshape(-1, self.d, self.d))
        scale_tril_inv = scale_tril_inv.reshape(
            self.batch_shape+(self.d, self.d))

        bsti = broadcast.broadcast_to(scale_tril_inv, x.shape + (self.d,))
        bl = broadcast.broadcast_to(self.loc, x.shape)
        m = matmul.matmul(
            bsti,
            expand_dims.expand_dims(x - bl, axis=-1))
        m = matmul.matmul(swapaxes.swapaxes(m, -1, -2), m)
        m = squeeze.squeeze(m, axis=-1)
        m = squeeze.squeeze(m, axis=-1)
        logz = LOGPROBC * self.d - self._logdet(self.scale_tril)
        return broadcast.broadcast_to(logz, m.shape) - 0.5 * m
Пример #12
0
    def __init__(self, p=None, **kwargs):
        logit = None
        if kwargs:
            logit, = argument.parse_kwargs(kwargs, ('logit', logit))
        if not (p is None) ^ (logit is None):
            raise ValueError(
                "Either `p` or `logit` (not both) must have a value.")

        with chainer.using_config('enable_backprop', True):
            if p is None:
                logit = chainer.as_variable(logit)
                self.__log_p = logit - expand_dims.expand_dims(
                    logsumexp.logsumexp(logit, axis=-1), axis=-1)
                self.__p = exponential.exp(self.__log_p)
            else:
                self.__p = chainer.as_variable(p)
                self.__log_p = exponential.log(self.__p)
Пример #13
0
    def __init__(self, p=None, **kwargs):
        logit = None
        if kwargs:
            logit, = argument.parse_kwargs(
                kwargs, ('logit', logit))
        if not (p is None) ^ (logit is None):
            raise ValueError(
                "Either `p` or `logit` (not both) must have a value.")

        with chainer.using_config('enable_backprop', True):
            if p is None:
                logit = chainer.as_variable(logit)
                self.__log_p = logit - expand_dims.expand_dims(
                    logsumexp.logsumexp(logit, axis=-1), axis=-1)
                self.__p = exponential.exp(self.__log_p)
            else:
                self.__p = chainer.as_variable(p)
                self.__log_p = exponential.log(self.__p)
Пример #14
0
def _kl_dirichlet_dirichlet(dist1, dist2):
    return - _lbeta(dist1.alpha) + _lbeta(dist2.alpha) \
        + sum_mod.sum((dist1.alpha - dist2.alpha) * (
            digamma.digamma(dist1.alpha)
            - expand_dims.expand_dims(digamma.digamma(
                dist1.alpha0), axis=-1)), axis=-1)
Пример #15
0
 def variance(self):
     alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1)
     return self.alpha * (alpha0 - self.alpha) \
         / alpha0 ** 2 / (alpha0 + 1)
Пример #16
0
 def mean(self):
     alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1)
     return self.alpha / alpha0
Пример #17
0
def stack(xs, axis=0):
    """Concatenate variables along a new axis.

    Args:
        xs (list of :class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`):
            Input variables to be concatenated. The variables must have the
            same shape.
        axis (int): The axis along which the arrays will be stacked. The
            ``axis`` parameter is acceptable when
            :math:`-ndim - 1 \\leq axis \\leq ndim`. (``ndim`` is the
            dimension of input variables). When :math:`axis < 0`, the result
            is the same with :math:`ndim + 1 - |axis|`.

    Returns:
        ~chainer.Variable:
            Output variable. Let ``x_1, x_2, ..., x_n`` and ``y`` be the input
            variables and the output variable,
            ``y[:, ..., 0, ..., :]`` is ``x_1``,
            ``y[:, ..., 1, ..., :]`` is ``x_2``
            and ``y[:, ..., n-1, ..., :]`` is ``x_n`` (The indexed axis
            indicates the ``axis``).

    .. admonition:: Example

        >>> x1 = np.arange(0, 12).reshape(3, 4)
        >>> x1.shape
        (3, 4)
        >>> x1
        array([[ 0,  1,  2,  3],
               [ 4,  5,  6,  7],
               [ 8,  9, 10, 11]])
        >>> x2 = np.arange(12, 24).reshape(3, 4)
        >>> x2.shape
        (3, 4)
        >>> x2
        array([[12, 13, 14, 15],
               [16, 17, 18, 19],
               [20, 21, 22, 23]])
        >>> y = F.stack([x1, x2], axis=0)
        >>> y.shape
        (2, 3, 4)
        >>> y.data
        array([[[ 0,  1,  2,  3],
                [ 4,  5,  6,  7],
                [ 8,  9, 10, 11]],
        <BLANKLINE>
               [[12, 13, 14, 15],
                [16, 17, 18, 19],
                [20, 21, 22, 23]]])
        >>> y = F.stack([x1, x2], axis=1)
        >>> y.shape
        (3, 2, 4)
        >>> y.data
        array([[[ 0,  1,  2,  3],
                [12, 13, 14, 15]],
        <BLANKLINE>
               [[ 4,  5,  6,  7],
                [16, 17, 18, 19]],
        <BLANKLINE>
               [[ 8,  9, 10, 11],
                [20, 21, 22, 23]]])
        >>> y = F.stack([x1, x2], axis=2)
        >>> y.shape
        (3, 4, 2)
        >>> y.data
        array([[[ 0, 12],
                [ 1, 13],
                [ 2, 14],
                [ 3, 15]],
        <BLANKLINE>
               [[ 4, 16],
                [ 5, 17],
                [ 6, 18],
                [ 7, 19]],
        <BLANKLINE>
               [[ 8, 20],
                [ 9, 21],
                [10, 22],
                [11, 23]]])
        >>> y = F.stack([x1, x2], axis=-1)
        >>> y.shape
        (3, 4, 2)

    """
    xs = [expand_dims.expand_dims(x, axis=axis) for x in xs]
    return concat.concat(xs, axis=axis)
Пример #18
0
def black_out(x, t, W, samples, reduce='mean'):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    The output is a variable whose value depends on the value of
    the option ``reduce``. If it is ``'no'``, it holds the
    no loss values. If it is ``'mean'``, this function takes
    a mean of loss values.

    Args:
        x (~chainer.Variable): Batch of input vectors.
            Its shape should be :math:`(N, D)`.
        t (~chainer.Variable): Vector of ground truth labels.
            Its shape should be :math:`(N,)`. Each elements :math:`v`
            should satisfy :math:`0 \\geq v \\geq V` or :math:`-1`
            where :math:`V` is the number of label types.
        W (~chainer.Variable): Weight matrix.
            Its shape should be :math:`(V, D)`
        samples (~chainer.Variable): Negative samples.
            Its shape should be :math:`(N, S)` where :math:`S` is
            the number of negative samples.
        reduce (str): Reduction option. Its value must be either
            ``'no'`` or ``'mean'``. Otherwise,
            :class:`ValueError` is raised.

    Returns:
        ~chainer.Variable:
            A variable object holding loss value(s).
            If ``reduce`` is ``'no'``, the output variable holds an
            array whose shape is :math:`(N,)` .
            If it is ``'mean'``, it holds a scalar.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.matmul(neg_emb, x[:, :, None])
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.matmul(pos_emb, x[:, :, None])
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(
        reshape.reshape(logz, (batch_size, 1)), neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size,))
    loss = -(py - logz + _sum.sum(ny, axis=1))
    if reduce == 'mean':
        loss = average.average(loss)
    return loss
Пример #19
0
 def mean(self):
     alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1)
     return self.alpha / alpha0