def zero_grad(self): r"""Set to a zero ``Tensor`` the gradient. This is call when initializing a ``Tensor`` that requires gradient tracking, or re-initialize parameters's gradient after a training loop as they accumulate on each other. """ self._grad = nets.zeros(self.shape, device=self.device, dtype='float64') self.detach() self._grad = nets.zeros(self.shape, device=self.device, dtype='float64')
def __init__(self, input_dim, hidden_dim): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim # Initialize all the weights (input - hidden - output) self.weight_ih = Parameter.orthogonal(shape=(input_dim, hidden_dim)) self.weight_hh = Parameter.orthogonal(shape=(hidden_dim, hidden_dim)) self.weight_ho = Parameter.orthogonal(shape=(hidden_dim, input_dim)) # Initialize all the biases (hidden - output) self.bias_h = Parameter.zeros(shape=(hidden_dim, )) self.bias_o = Parameter.zeros(shape=(input_dim, )) # Initialize the first hidden cell self.weight_h0 = nets.zeros(shape=(1, hidden_dim))
def im2col(input_data, filter_h, filter_w, stride=1, pad=0): r"""Transform 4 dimensional images to 2 dimensional array. From stanford university cs231n assignment 2. More [here](http://cs231n.stanford.edu/). Args: input_data (Tensor): 4 dimensional input images (The number of images, The number of channels, Height, Widht) filter_h (int): height of filter filter_w (int): width of fitter stride (int): the interval of stride pad (int): the interval of padding Returns: col (Tensor): 2 dimnesional tensor .. warning:: This function is not compatible with ``autograd``system. The resulting ``Tensor`` has no links to a previous computational graph, and in addition its gradient is set to ``None``. """ # Extract the shape from one's image N, C, H, W = input_data.shape # Make sure that the convolution can be executed # TODO: replace by a warning assert (H + 2 * pad - filter_h) % stride == 0, f'invalid parameters, (H + 2 * pad - filter_h) % stride != 0, got ' \ f'H={H}, pad={pad}, filter_h={filter_h}, stride={stride}' assert (W + 2 * pad - filter_w) % stride == 0, f'invalid parameters, (W + 2 * pad - filter_w) % stride != 0, got ' \ f'W={W}, pad={pad}, filter_w={filter_w}, stride={stride}' # Initialize the output dimensions out_h = (H + 2 * pad - filter_h) // stride + 1 out_w = (W + 2 * pad - filter_w) // stride + 1 # Pad the input data padding = ((0, 0), (0, 0), (pad, pad), (pad, pad)) image = nets.pad(input_data, padding) # Initialize the output col = nets.zeros((N, C, filter_h, filter_w, out_h, out_w)) # For loops... for y in range(filter_h): y_max = y + stride * out_h for x in range(filter_w): x_max = x + stride * out_w col[:, :, y, x, :, :] = image[:, :, y:y_max:stride, x:x_max:stride] col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1) return col
def backward(self, dout): """Manual backward pass for a MaxPool2d layer.""" dout = dout.transpose(0, 2, 3, 1) # Initialize pool_size = np.product(self.pool_size) dmax = nets.zeros((dout.size, pool_size)) # Get the cache x = self._cache['x'] argmax = self._cache['argmax'] dmax[nets.arange(argmax.size), argmax.flatten()] = dout.flatten() dmax = dmax.reshape(dout.shape + (pool_size, )) dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1) dx = col2im(dcol, x.shape, *self.pool_size, self.stride, self.pad) return dx
def one_hot(Y, num_classes): r"""Perform one-hot encoding on input Y. .. math:: \text{Y'}_{i, j} = \begin{cases} 1, &\quad if \quad Y_i = 0 \\ 0, &\quad else \end{cases} Args: Y (Tensor): 1D tensor of classes indices of length :math:`N` num_classes (int): number of classes :math:`c` Returns: Tensor: one hot encoded tensor of shape :math:`(N, c)` """ batch_size = len(Y) Y_tilde = nets.zeros((batch_size, num_classes)) Y_tilde[nets.arange(batch_size), Y] = 1 return Y_tilde.astype(int)