Пример #1
0
def adam(network, loss, rate, decay, epsilon=1e-8, clip=5.0, mrate=0.0005):
    """  ADAMski optimiser

    Similar to ADAM optimizer but with momentum phased in gradually from 0,
    as having lower momentum at the start of training seems to be beneficial.
    See: https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf page 10

    :param network: network to optimise
    :param loss: loss function to optimise over
    :param rate: rate (step size) for optimiser
    :param decay: decay for estimate of gradient and curvature
    :param epsilon: same parameter to prevent reciprocal of variance exploding
    :param mrate: Rate at which momentum is increased.  None = ADAM optimiser

    :returns: a dictionary containing update functions for Tensors
    """
    assert decay > (0.0, 0.0), "Decay must be non-negative"
    assert decay < (1.0, 1.0), "Decay must be less-than or equal to one"
    assert mrate is None or mrate > 0.0, "Rate of momentum increase must be positive"
    if mrate is not None:
        _M_RATE = -np.float_(mrate).astype(sloika_dtype)
        _M_P = np.exp(_M_RATE)
        _M_K = (1.0 - decay[0]) * decay[0] * _M_P / (1.0 - _M_P * decay[0])
        _M_K = np.float_(_M_K).astype(sloika_dtype)
    else:
        _M_RATE = -np.float_(1e30).astype(sloika_dtype)
        _M_P = np.float_(0.0).astype(sloika_dtype)
        _M_K = np.float_(0.0).astype(sloika_dtype)

    params = network.params()
    updates = OrderedDict()
    gradients = th.grad(loss, params)

    ldecay = np.log(decay, dtype=sloika_dtype)

    t = th.shared(np.float32(0.0).astype(sloika_dtype))
    lr_t = th.shared(np.float32(0.0).astype(sloika_dtype))
    momentum_decay = th.shared(np.float32(0.0).astype(sloika_dtype))
    updates[t] = t + 1.0
    momentum_factor = _M_K * T.expm1(t * (ldecay[0] + _M_RATE)) - T.expm1(
        updates[t] * ldecay[0])
    updates[lr_t] = rate * T.sqrt(-T.expm1(updates[t] *
                                           ldecay[1])) / momentum_factor
    updates[momentum_decay] = -decay[0] * T.expm1(updates[t] * _M_RATE)
    for param, grad in zip(params, gradients):
        val = param.get_value(borrow=True)
        momentum = th.shared(np.zeros(val.shape, dtype=val.dtype))
        variance = th.shared(np.zeros(val.shape, dtype=val.dtype))

        grad_clip = T.clip(grad, -clip, clip)

        updates[momentum] = updates[momentum_decay] * momentum + (
            1.0 - decay[0]) * grad_clip
        updates[variance] = decay[1] * variance + (1.0 -
                                                   decay[1]) * T.sqr(grad_clip)
        updates[param] = param - updates[lr_t] * updates[momentum] / (
            T.sqrt(updates[variance]) + epsilon)

    return updates
Пример #2
0
def elu(x):
    """Exponential Linear Unit :math:`\\varphi(x) = (x > 0) ? x : e^x - 1`

    The Exponential Linear Unit (ELU) was introduced in [1]_. Compared to the
    linear rectifier :func:`rectify`, it has a mean activation closer to zero
    and nonzero gradient for negative input, which can help convergence.
    Compared to the leaky rectifier :class:`LeakyRectify`, it saturates for
    highly negative inputs.

    Parameters
    ----------
    x : float32
        The activation (the summed, weighed input of a neuron).

    Returns
    -------
    float32
        The output of the exponential linear unit for the activation.

    Notes
    -----
    In [1]_, an additional parameter :math:`\\alpha` controls the (negative)
    saturation value for negative inputs, but is set to 1 for all experiments.
    It is omitted here.

    References
    ----------
    .. [1] Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter (2015):
       Fast and Accurate Deep Network Learning by Exponential Linear Units
       (ELUs), http://arxiv.org/abs/1511.07289
    """
    return tensor.switch(x > 0, x, tensor.expm1(x))
Пример #3
0
    def __init__(self,
                 incoming,
                 sample_rate,
                 frame_len,
                 num_bands,
                 min_freq,
                 max_freq,
                 trainable=True,
                 **kwargs):
        super(MelBankLayer, self).__init__(incoming, **kwargs)
        # mel-spaced peak frequencies
        min_mel = 1127 * np.log1p(min_freq / 700.0)
        max_mel = 1127 * np.log1p(max_freq / 700.0)
        spacing = (max_mel - min_mel) / (num_bands + 1)
        spaces = np.ones(num_bands + 2) * spacing
        spaces[0] = min_mel
        spaces = theano.shared(lasagne.utils.floatX(spaces))  # learned param
        peaks_mel = spaces.cumsum()

        # create parameter as a vector of real-valued peak bins
        peaks_hz = 700 * (T.expm1(peaks_mel / 1127))
        peaks_bin = peaks_hz * frame_len / sample_rate
        self.peaks = self.add_param(peaks_bin,
                                    shape=(num_bands + 2, ),
                                    name='peaks',
                                    trainable=trainable,
                                    regularizable=False)

        # store what else is needed
        self.num_bands = num_bands
Пример #4
0
def selu(x):
    """
    Scaled exponential linear units as proposed in [1].

    [1] - https://arxiv.org/pdf/1706.02515.pdf
    """
    alpha = 1.6732632423543772848170429916717
    lam = 1.0507009873554804934193349852946
    return lam * switch(x >= 0.0, x, alpha * expm1(x))
Пример #5
0
def log1mexp(x):
    """Return log(1 - exp(-x)).

    This function is numerically more stable than the naive approach.

    For details, see
    https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf
    """
    return tt.switch(tt.lt(x, 0.683), tt.log(-tt.expm1(-x)),
                     tt.log1p(-tt.exp(-x)))
Пример #6
0
def log1mexp(x):
    """Return log(1 - exp(-x)).

    This function is numerically more stable than the naive approch.

    For details, see
    https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf
    """
    return tt.switch(
        tt.lt(x, 0.683),
        tt.log(-tt.expm1(-x)),
        tt.log1p(-tt.exp(-x)))
Пример #7
0
def log1mexp(x):
    r"""Return log(1 - exp(-x)).

    This function is numerically more stable than the naive approach.

    For details, see
    https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf

    References
        ----------
        .. [Machler2012] Martin Mächler (2012).
            "Accurately computing `\log(1-\exp(- \mid a \mid))` Assessed by the Rmpfr
            package"

    """
    return tt.switch(tt.lt(x, 0.6931471805599453), tt.log(-tt.expm1(-x)), tt.log1p(-tt.exp(-x)))
Пример #8
0
def selu(x):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    return scale * TT.switch(x > 0, x, alpha * TT.expm1(x))
Пример #9
0
def rmspe(y_true, y_pred):
    y_true = T.expm1(y_true)
    y_pred = T.expm1(y_pred)
    return T.sqrt(T.sqr((y_true - y_pred) / y_true).mean(axis=-1))
Пример #10
0
def blackbody_lambda(lam, temperature):
    """
    Compute the blackbody flux as a function of wavelength `lam` in mks units
    """
    return (two * hc2 / tt.pow(lam, 5) / tt.expm1(h * c /
                                                  (lam * k_B * temperature)))
Пример #11
0
def rmspe(y_true, y_pred):
    y_true = T.expm1(y_true)
    y_pred = T.expm1(y_pred)    
    return T.sqrt(T.sqr((y_true - y_pred)/y_true).mean(axis=-1))    
Пример #12
0
 def inv_temp_cond_prob_func(self, inv_temp, delta):
     return tt.switch(tt.eq(delta, 0.), tt.ones_like(delta),
                      -tt.exp(-inv_temp * delta) * delta / tt.expm1(-delta))
Пример #13
0
 def inv_temp_cond_prob_0_1(self, delta):
     prob_0 = tt.switch(tt.eq(delta, 0.), tt.ones_like(delta),
                        -delta / tt.expm1(-delta))
     prob_1 = tt.switch(tt.eq(delta, 0.), tt.ones_like(delta),
                        delta / tt.expm1(delta))
     return prob_0, prob_1
Пример #14
0
 def __call__(self, x):
     return self.scale * tensor.switch(x > 0.0, x,
                                       self.scale_neg * (tensor.expm1(x)))
Пример #15
0
def elu(x):
    """  Exponential Linear Unit
         See https://arxiv.org/pdf/1511.07289.pdf
    """
    return T.switch(x > 0, x, T.expm1(x))