Exemplo n.º 1
0
 def backward(self, grad):
     # We need to keep the information on which axis the sum was made (to be broadcasting compatible)
     # We always reshape the gradient in the same axis for back-propagation
     tensor, = self.tensors
     data_keepdims = tensor.sum(axis=self.axis, keepdims=True)
     grad = grad.reshape(data_keepdims.shape) + nets.zeros_like(tensor)
     return grad
Exemplo n.º 2
0
 def __init__(self,
              parameters,
              lr=1e-2,
              beta1=0.9,
              beta2=0.999,
              epsilon=1e-8):
     super().__init__(parameters)
     self.lr = lr
     self.beta1 = beta1
     self.beta2 = beta2
     self.epsilon = epsilon
     self._cache = {
         'velocity': [nets.zeros_like(p) for p in self.parameters],
         'momentum': [nets.zeros_like(p) for p in self.parameters],
         't': 0
     }
Exemplo n.º 3
0
def relu_prime(x):
    # type: (Array) -> Array
    r"""First order derivative of the ``relu`` function.

    .. math::
        \text{relu'(x)} =
                        \begin{cases}
                          1, &\quad x \ge 0 \\
                          0, &\quad x < 0.
                        \end{cases}

    Shape:
        - input: x (numpy.array): input to compute the ``leaky_relu`` function on.
        - output: y (numpy.array): gradient of the input, with the same shape than ``x``.

    .. image:: images/functional_relu_prime.png

    Examples::

        >>> in_array = np.array([-5, 2, 6, -2, 4])
        >>> out_array = relu_prime(in_array)

    See :class:`~nets.nn.activation.ReLU` for the activation implementation.
    """
    return where(x >= 0, nets.ones_like(x), nets.zeros_like(x))
Exemplo n.º 4
0
 def backward(self, grad):
     tensor, = self.tensors
     bigger_grad = nets.zeros_like(tensor)
     if grad.shape != bigger_grad.shape:
         bigger_grad[self.indices] = grad
     else:
         bigger_grad = grad
     return bigger_grad
Exemplo n.º 5
0
 def __init__(self, parameters, lr=1e-2, decay=0.99, epsilon=1e-8):
     super().__init__(parameters)
     self.lr = lr
     self.decay = decay
     self.epsilon = epsilon
     self._cache = {
         'velocity': [nets.zeros_like(p) for p in self.parameters]
     }
Exemplo n.º 6
0
    def backward(self, grad):
        tensor, = self.tensors
        bigger_grad = nets.zeros_like(tensor)
        nc = numpy_or_cupy(grad)
        if self.axis is None:
            # If there is no axis, the argmax is the location of he maximum single element
            max_indices = nets.unravel_index(
                nets.argmax(tensor), tensor.shape)
            bigger_grad[max_indices] = grad
        else:
            # If there is an axis, we reconstruct the bigger matrix by 'rolling' on this axis
            max_indices = nets.argmax(tensor, axis=self.axis)
            for i, roll in enumerate(nets.rollaxis(bigger_grad, self.axis)):
                roll += (max_indices == i).astype(int) * grad

        return bigger_grad
Exemplo n.º 7
0
    def backward(self, dout):
        """
        Computes the backward pass of a vanilla RNN.
        Save gradients parameters in the ``_grads`` parameter.

        Args:
            dout (Tensor): upstream gradient.

        Returns:
            Tensor: downstream gradient
        """
        # Initialize gradients as zero
        dw_ih = nets.zeros_like(self.weight_ih)
        dw_hh = nets.zeros_like(self.weight_hh)
        dw_ho = nets.zeros_like(self.weight_ho)
        db_h = nets.zeros_like(self.bias_h)
        db_o = nets.zeros_like(self.bias_o)
        # Get the cache
        hidden_states = self._cache['hidden_states']
        inputs = self._cache['x']

        # Keep track of hidden state derivative and loss
        dh_t = nets.zeros_like(hidden_states[0])

        # For each element in output sequence
        # NB: We iterate backwards s.t. t = N, N-1, ... 1, 0
        for t in reversed(range(dout.shape[0])):
            # Back-propagate into output sigmoid
            do = nets.sigmoid_prime(dout[t])
            db_o += do
            # Back-propagate into weight_ho
            dw_ho += nets.dot(hidden_states[t].T, do)
            # Back-propagate into h_t
            dh = nets.dot(do, self.weight_ho.T) + dh_t
            # Back-propagate through non-linearity tanh
            df = nets.tanh_prime(hidden_states[t]) * dh
            db_h += df
            # Back-propagate into weight_ih
            dw_ih += nets.dot(inputs[t].T, df)
            # Back-propagate into weight_hh
            dw_hh += nets.dot(hidden_states[t - 1].T, df)
            dh_t = nets.dot(df, self.weight_hh.T)

        # TODO: dx grad
        # dx = nets.dot(dout, self.weight_ih)

        # Save gradients
        self._grads["weight_ih"] = dw_ih
        self._grads["weight_hh"] = dw_hh
        self._grads["weight_ho"] = dw_ho
        self._grads["bias_h"] = db_h
        self._grads["bias_o"] = db_o

        return None
Exemplo n.º 8
0
def relu(t):
    r"""``relu`` is a standard activation function, defined as:

    .. math::
        \text{relu(t)} = \max{(0, t)}

    Args:
        t (Tensor): input tensor.

    .. image:: /images/functional_relu.png

    Example:
        >>> import nets
        >>> tensor = nets.tensor([-5, 2, 6, -2, 4])
        >>> tensor = relu(tensor)

    See :class:`~nets.nn.activation.ReLU` for the activation implementation.
    """
    t = nets.to_tensor(t)
    return maximum(nets.zeros_like(t), t)
Exemplo n.º 9
0
def relu(x):
    # type: (Array) -> Array
    r"""``relu`` is a standard activation function, defined as:

    .. math::
        \text{relu(x)} = \max{(0, x)}

    Shape:
        - input: x (numpy.array): input to compute the ``relu`` function on.
        - output: y (numpy.array): ``relu`` output, with the same shape than ``x``.

    .. image:: images/functional_relu.png

    Examples::

        >>> in_array = np.array([-5, 2, 6, -2, 4])
        >>> out_array = relu(in_array)

    See :class:`~nets.nn.activation.ReLU` for the activation implementation.
    """
    return maximum(nets.zeros_like(x), x)
Exemplo n.º 10
0
def relu_prime(t):
    r"""First order derivative of the ``relu`` function.

    .. math::
        \text{relu'(t)} =
            \begin{cases}
                1, &\quad t \ge 0 \\
                0, &\quad t < 0.
            \end{cases}

    Args:
        t (Tensor): input tensor.

    .. image:: images/functional_relu_prime.png

    Example:
        >>> import nets
        >>> tensor = nets.tensor([-5, 2, 6, -2, 4])
        >>> relu_prime(tensor)

    See :class:`~nets.nn.activation.ReLU` for the activation implementation.
    """
    t = nets.to_tensor(t)
    return where(t >= 0, nets.ones_like(t), nets.zeros_like(t))
Exemplo n.º 11
0
 def __init__(self, parameters, lr=1e-2, momentum=0):
     super().__init__(parameters)
     self.lr = lr
     self.momentum = momentum
     self._cache = {'velocity': [nets.zeros_like(p) for p in self.parameters]}