Exemplo n.º 1
0
def create_fns(input, in_signs, Ds):

    cumulative_units = np.concatenate([[0], np.cumsum(Ds[:-1])])
    
    Ws = [sj.initializers.he((j, i)) for j, i in zip(Ds[1:], Ds[:-1])]
    bs = [sj.initializers.he((j,)) for j in Ds[1:]]

    A_w = [T.eye(Ds[0])]
    B_w = [T.zeros(Ds[0])]
    
    A_q = [T.eye(Ds[0])]
    B_q = [T.zeros(Ds[0])]
    
    maps = [input]
    signs = []
    masks = [T.ones(Ds[0])]
    in_masks = T.where(T.concatenate([T.ones(Ds[0]), in_signs]) > 0, 1.,
                                     0.1)

    for w, b in zip(Ws[:-1], bs[:-1]):
        
        pre_activation = T.matmul(w, maps[-1]) + b
        signs.append(T.sign(pre_activation))
        masks.append(T.where(pre_activation > 0, 1., 0.1))

        maps.append(pre_activation * masks[-1])

    maps.append(T.matmul(Ws[-1], maps[-1]) + bs[-1])

    # compute per region A and B
    for start, end, w, b, m in zip(cumulative_units[:-1],
                                   cumulative_units[1:], Ws, bs, masks):

        A_w.append(T.matmul(w * m, A_w[-1]))
        B_w.append(T.matmul(w * m, B_w[-1]) + b)

        A_q.append(T.matmul(w * in_masks[start:end], A_q[-1]))
        B_q.append(T.matmul(w * in_masks[start:end], B_q[-1]) + b)

    signs = T.concatenate(signs)
    ineq_b = T.concatenate(B_w[1:-1])
    ineq_A = T.vstack(A_w[1:-1])

    inequalities = T.hstack([ineq_b[:, None], ineq_A])
    inequalities = inequalities * signs[:, None] / T.linalg.norm(ineq_A, 2,
                                                         1, keepdims=True)

    inequalities_code = T.hstack([T.concatenate(B_q[1:-1])[:, None],
                                  T.vstack(A_q[1:-1])])
    inequalities_code = inequalities_code * in_signs[:, None]

    f = sj.function(input, outputs=[maps[-1], A_w[-1], B_w[-1],
                                    inequalities, signs])
    g = sj.function(in_signs, outputs=[A_q[-1], B_q[-1]])
    all_g = sj.function(in_signs, outputs=inequalities_code)
    h = sj.function(input, outputs=maps[-1])

    return f, g, h, all_g
Exemplo n.º 2
0
def elu(x, alpha=1.0):
    r"""Exponential linear unit activation function.

  Computes the element-wise function:

  .. math::
    \mathrm{elu}(x) = \begin{cases}
      x, & x > 0\\
      \alpha \left(\exp(x) - 1\right), & x \le 0
    \end{cases}
  """
    safe_x = T.where(x > 0, 0.0, x)
    return T.where(x > 0, x, alpha * T.expm1(safe_x))
Exemplo n.º 3
0
def hard_tanh(x):
    r"""Hard :math:`\mathrm{tanh}` activation function.

  Computes the element-wise function:

  .. math::
    \mathrm{hard\_tanh}(x) = \begin{cases}
      -1, & x < -1\\
      x, & 0 \le x \le 1\\
      1, & 1 < x
    \end{cases}
  """
    return T.where(x > 1, 1, T.where(x < -1, -1, x))
Exemplo n.º 4
0
    def forward(
        self,
        input,
        axis,
        deterministic,
        const=1e-4,
        beta1=0.9,
        beta2=0.9,
        W=T.ones,
        b=T.zeros,
        trainable_W=True,
        trainable_b=True,
    ):

        self.beta1 = beta1
        self.beta2 = beta2
        self.const = const
        self.axis = axis
        self.deterministic = deterministic

        parameter_shape = [
            input.shape[i] if i in axis else 1 for i in range(input.ndim)
        ]
        reduce_axes = [i for i in range(input.ndim) if i not in axis]

        self.create_variable("W", W, parameter_shape, trainable=trainable_W)
        self.create_variable("b", b, parameter_shape, trainable=trainable_b)

        input_mean = T.mean(input, reduce_axes, keepdims=True)
        input_inv_std = 1 / (T.std(input, reduce_axes, keepdims=True) + const)

        self.avg_mean = schedules.ExponentialMovingAverage(input_mean,
                                                           beta1)[1]
        self.avg_inv_std = schedules.ExponentialMovingAverage(
            input_inv_std, beta2)[1]

        use_mean = T.where(deterministic, self.avg_mean, input_mean)
        use_inv_std = T.where(deterministic, self.avg_inv_std, input_inv_std)
        W = self.W or 1.0
        b = self.b if self.b is not None else 0.0
        return W * (input - use_mean) * use_inv_std + b
Exemplo n.º 5
0
def leaky_relu(x, negative_slope=1e-2):
    r"""Leaky rectified linear unit activation function.

  Computes the element-wise function:

  .. math::
    \mathrm{leaky\_relu}(x) = \begin{cases}
      x, & x \ge 0\\
      \alpha x, & x < 0
    \end{cases}

  where :math:`\alpha` = :code:`negative_slope`.
  """
    return T.where(x >= 0, x, negative_slope * x)
Exemplo n.º 6
0
    def __init__(self, input, p, axis, deterministic, seed=None):

        extra_dims = input.ndim - 1
        flip = T.random.bernoulli(
            shape=(input.shape[0], ) + (1, ) * extra_dims,
            p=p,
            seed=seed,
        )

        dirac = T.cast(deterministic, "float32")

        flipped_input = T.where(flip, T.flip(input, axis), input)

        return input * dirac + flipped_input * (1 - dirac)
Exemplo n.º 7
0
def celu(x, alpha=1.0):
    r"""Continuously-differentiable exponential linear unit activation.

  Computes the element-wise function:

  .. math::
    \mathrm{celu}(x) = \begin{cases}
      x, & x > 0\\
      \alpha \left(\exp(\frac{x}{\alpha}) - 1\right), & x \le 0
    \end{cases}

  For more information, see
  `Continuously Differentiable Exponential Linear Units
  <https://arxiv.org/pdf/1704.07483.pdf>`_."""
    return T.where(x > 0, x, alpha * T.expm1(x / alpha))
Exemplo n.º 8
0
    def create_updates(
        self,
        grads_or_loss,
        learning_rate,
        amsgrad=False,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7,
        params=None,
    ):

        if isinstance(grads_or_loss, list):
            assert params
        if params is None:
            params = self._get_variables(grads_or_loss)
        elif type(params) != list:
            raise RuntimeError("given params should be a list")

        if len(params) == 0:
            raise RuntimeError(
                "no parameters are given for the gradients, this can be due to passing explicitly an empty list or to passing a lost connected to no trainable weights"
            )
        grads = self._get_grads(grads_or_loss, params)

        local_step = tensor.Variable(1, dtype="int32", trainable=False)
        updates = {local_step: local_step + 1}

        beta_1_t = tensor.power(beta_1, local_step)
        beta_2_t = tensor.power(beta_2, local_step)
        lr = learning_rate * (tensor.sqrt(1 - beta_2_t) / (1 - beta_1_t))

        for param, grad in zip(params, grads):
            m = ExponentialMovingAverage(grad, beta_1, debias=False)[0]
            v = ExponentialMovingAverage(grad**2, beta_2, debias=False)[0]
            if amsgrad:
                v_hat = tensor.Variable(tensor.zeros_like(param),
                                        name="v_hat",
                                        trainable=False)
                updates[v_hat] = tensor.maximum(v_hat, v)
                update = m / (tensor.sqrt(updates[v_hat]) + epsilon)
            else:
                update = m / (tensor.sqrt(v) + epsilon)
            update = tensor.where(local_step == 1, grad, update)
            updates[param] = param - lr * update

        self.add_updates(updates)
Exemplo n.º 9
0
def ExponentialMovingAverage(value, alpha):

    with Scope("ExponentialMovingAverage"):

        first_step = T.Variable(True,
                                trainable=False,
                                name="first_step",
                                dtype="bool")

        var = T.Variable(T.zeros(value.shape),
                         trainable=False,
                         dtype="float32",
                         name="EMA")

        new_value = T.where(first_step, value,
                            var * alpha + (1 - alpha) * value)

        current_graph().add({var: new_value, first_step: False})

    return new_value, var
Exemplo n.º 10
0
def create_fns(input,
               in_signs,
               Ds,
               x,
               m0,
               m1,
               m2,
               batch_in_signs,
               alpha=0.1,
               sigma=1,
               sigma_x=1,
               lr=0.0002):

    cumulative_units = np.concatenate([[0], np.cumsum(Ds[:-1])])
    BS = batch_in_signs.shape[0]
    Ws = [
        T.Variable(sj.initializers.glorot((j, i)) * sigma)
        for j, i in zip(Ds[1:], Ds[:-1])
    ]
    bs = [T.Variable(sj.initializers.he((j,)) * sigma) for j in Ds[1:-1]]\
                + [T.Variable(T.zeros((Ds[-1],)))]

    A_w = [T.eye(Ds[0])]
    B_w = [T.zeros(Ds[0])]

    A_q = [T.eye(Ds[0])]
    B_q = [T.zeros(Ds[0])]

    batch_A_q = [T.eye(Ds[0]) * T.ones((BS, 1, 1))]
    batch_B_q = [T.zeros((BS, Ds[0]))]

    maps = [input]
    signs = []
    masks = [T.ones(Ds[0])]

    in_masks = T.where(T.concatenate([T.ones(Ds[0]), in_signs]) > 0, 1., alpha)
    batch_in_masks = T.where(
        T.concatenate([T.ones((BS, Ds[0])), batch_in_signs], 1) > 0, 1., alpha)

    for w, b in zip(Ws[:-1], bs[:-1]):

        pre_activation = T.matmul(w, maps[-1]) + b
        signs.append(T.sign(pre_activation))
        masks.append(T.where(pre_activation > 0, 1., alpha))

        maps.append(pre_activation * masks[-1])

    maps.append(T.matmul(Ws[-1], maps[-1]) + bs[-1])

    # compute per region A and B
    for start, end, w, b, m in zip(cumulative_units[:-1], cumulative_units[1:],
                                   Ws, bs, masks):

        A_w.append(T.matmul(w * m, A_w[-1]))
        B_w.append(T.matmul(w * m, B_w[-1]) + b)

        A_q.append(T.matmul(w * in_masks[start:end], A_q[-1]))
        B_q.append(T.matmul(w * in_masks[start:end], B_q[-1]) + b)

        batch_A_q.append(
            T.matmul(w * batch_in_masks[:, None, start:end], batch_A_q[-1]))
        batch_B_q.append((w * batch_in_masks[:, None, start:end]\
                            * batch_B_q[-1][:, None, :]).sum(2) + b)

    batch_B_q = batch_B_q[-1]
    batch_A_q = batch_A_q[-1]

    signs = T.concatenate(signs)

    inequalities = T.hstack(
        [T.concatenate(B_w[1:-1])[:, None],
         T.vstack(A_w[1:-1])]) * signs[:, None]

    inequalities_code = T.hstack(
        [T.concatenate(B_q[1:-1])[:, None],
         T.vstack(A_q[1:-1])]) * in_signs[:, None]

    #### loss
    log_sigma2 = T.Variable(sigma_x)
    sigma2 = T.exp(log_sigma2)

    Am1 = T.einsum('qds,nqs->nqd', batch_A_q, m1)
    Bm0 = T.einsum('qd,nq->nd', batch_B_q, m0)
    B2m0 = T.einsum('nq,qd->n', m0, batch_B_q**2)
    AAm2 = T.einsum('qds,qdu,nqup->nsp', batch_A_q, batch_A_q, m2)

    inner = -(x * (Am1.sum(1) + Bm0)).sum(1) + (Am1 * batch_B_q).sum((1, 2))

    loss_2 = (x**2).sum(1) + B2m0 + T.trace(AAm2, axis1=1, axis2=2).squeeze()

    loss_z = T.trace(m2.sum(1), axis1=1, axis2=2).squeeze()

    cst = 0.5 * (Ds[0] + Ds[-1]) * T.log(2 * np.pi)

    loss = cst + 0.5 * Ds[-1] * log_sigma2 + inner / sigma2\
            + 0.5 * loss_2 / sigma2 + 0.5 * loss_z

    mean_loss = loss.mean()
    adam = sj.optimizers.NesterovMomentum(mean_loss, Ws + bs, lr, 0.9)

    train_f = sj.function(batch_in_signs,
                          x,
                          m0,
                          m1,
                          m2,
                          outputs=mean_loss,
                          updates=adam.updates)
    f = sj.function(input,
                    outputs=[maps[-1], A_w[-1], B_w[-1], inequalities, signs])
    g = sj.function(in_signs, outputs=[A_q[-1], B_q[-1]])
    all_g = sj.function(in_signs, outputs=inequalities_code)
    h = sj.function(input, outputs=maps[-1])
    return f, g, h, all_g, train_f, sigma2
Exemplo n.º 11
0
def relu_mask(x, leakiness):
    if type(x) == list:
        return [relu_mask(xx, leakiness) for xx in x]
    return T.where(x >= 0, 1., leakiness)
Exemplo n.º 12
0
    def forward(self, input, p, deterministic, seed=None):

        self.p = p
        self.mask = T.random.bernoulli(shape=input.shape, p=p, seed=seed)

        return T.where(deterministic, input, self.mask * input)
Exemplo n.º 13
0
def ExponentialMovingAverage(
    value,
    alpha,
    init=None,
    decay_min=False,
    debias=True,
    name="ExponentialMovingAverage",
):

    """exponential moving average of a given value

    This method allows to obtain an EMA of a given variable (or any Tensor)
    with internal state automatically upating its values as new samples are
    observed and the internal updates are applied as part of a fuction
    At each iteration the new value is given by

    .. math::

      v(0) = value(0) or init
      v(t) = v(t-1) * alpha + value(t) * (1 - alpha)

    Args
    ----

    value: Tensor-like
        the value to use for the EMA

    alpha: scalar
        the decay of the EMA

    init: Tensor-like (same shape as value) optional
        the initialization of the EMA, if not given uses the value
        allowing for unbiased estimate

    decay_min: bool
        at early stages, clip the decay to avoid erratir behaviors

    Returns
    -------

    ema: Tensor-like
        the current (latest) value of the EMA incorporating information
        of the latest observation of value

    fixed_ema: Tensor-like
        the value of the EMA of the previous pass. This is usefull if one wants
        to keep the estimate of the EMA fixed for new observations, then simply
        do not apply anymore updates (using a new function) and using this
        fixed variable during testing (while ema will keep use the latest
        observed value)



    Example
    -------

    .. doctest ::
    >>> import symjax
    >>> import numpy as np
    >>> np.random.seed(0)
    >>> symjax.current_graph().reset()
    >>> # suppose we want to do an EMA of a vector user-input
    >>> input = symjax.tensor.Placeholder((2,), 'float32')
    >>> ema, var = symjax.nn.schedules.ExponentialMovingAverage(input, 0.9)
    >>> # in the background, symjax automatically records the needed updates
    >>> print(symjax.get_updates())
    {Variable(name=EMA, shape=(2,), dtype=float32, trainable=False, scope=/ExponentialMovingAverage/): Op(name=where, fn=where, shape=(2,), dtype=float32, scope=/ExponentialMovingAverage/), Variable(name=first_step, shape=(), dtype=bool, trainable=False, scope=/ExponentialMovingAverage/): False}
    >>> # example of use:
    >>> f = symjax.function(input, outputs=ema, updates=symjax.get_updates())
    >>> for i in range(25):
    ...     print(f(np.ones(2) + np.random.randn(2) * 0.3))
    [1.5292157 1.1200472]
    [1.5056562 1.1752692]
    [1.5111173 1.1284239]
    [1.4885082 1.1110408]
    [1.4365609 1.1122546]
    [1.3972261 1.1446574]
    [1.3803346 1.1338419]
    [1.355617  1.1304679]
    [1.3648777 1.1112664]
    [1.3377819 1.0745169]
    [1.227414  1.0866737]
    [1.2306056 1.0557414]
    [1.2756376 1.0065362]
    [1.2494465 1.000267 ]
    [1.2704852 1.0443211]
    [1.2480851 1.0512339]
    [1.196643  0.9866866]
    [1.1665413 0.9927084]
    [1.186796 1.029509]
    [1.1564965 1.017489 ]
    [1.1093903  0.97313946]
    [1.0472631 1.0343488]
    [1.0272473 1.0177717]
    [0.9869387 1.0393193]
    [0.93982786 1.029005  ]



    """

    with Scope(name):

        init = init if init is not None else T.zeros_like(value, detach=True)

        num_steps = T.Variable(0, trainable=False, name="num_steps", dtype="int32")

        var = T.Variable(init, trainable=False, dtype="float32", name="EMA")

        if decay_min:
            decay = T.minimum(alpha, (1.0 + num_steps) / (10.0 + num_steps))
        else:
            decay = alpha

        ema = decay * var + (1 - decay) * value
        var_update = T.where(T.equal(num_steps, 0), init, ema)

        current_graph().add_updates({var: ema, num_steps: num_steps + 1})

        if debias:
            debiased_ema = ema_debias(ema, init, decay, num_steps + 1)
            debiased_var = T.Variable(
                init, trainable=False, dtype="float32", name="debiased_EMA"
            )
            current_graph().add_updates({debiased_var: debiased_ema})

    if debias:
        return debiased_ema, debiased_var
    else:
        return ema, var