def predict(self, z, *args, output_only=True, **kwargs): """ Returns the output of this layer Parameters ---------- z : np.array z is assumed to be a list of all the inputs to be forward propagated. In particular it is assumed that the first index of z is the index that inputs is accessed by output_only : :obj:`bool`, optional If set to true, then this function will return only the prediction of the neural network. If set to false then this will return the outputs of the individual layers. Unless back propagation is being performed, this should be set to true. Returns ------- np.array The final output of the layer OR (if `output_only = False`) np.array, np.array The first np.array will store the output before it is passed through the activation function. The second np.array will store the output after it has passed through the activation function. """ check_layer(self) if output_only: return z else: return z, z
def predict(self, z, output_only=True, **kwargs): """ Returns the output of this layer Parameters ---------- z : (N, ...) np.array z is assumed to be a list of all the inputs to be forward propagated. In particular it is assumed that the first index of z is the index that inputs is accessed by output_only : bool, optional If set to true, then this function will return only the prediction of the neural network. If set to false then this will return the outputs of the individual layers. Unless back propagation is being performed, this should be set to true. Returns ------- (N, ...) np.array The final output of the layer, post activation OR (if `output_only = False`) (N, ...) np.array, (N, ...) np.array The first np.array will store the output before it is passed through the activation function. The second np.array will store the output after it has passed through the activation function. """ check_layer(self) a = self.W[None, ...] * z + self.b[None, ...] if output_only: return self.activation_function_(a) return a, self.activation_function_(a)
def get_weight_grad_(self, delta, prev_z): """ Returns the associated partial S/partial W^k, that is the gradient with respect to the weight matrix in the kth layer Parameters ---------- delta : (N, k) np.array In latex, this should be delta_k prev_z : (N, j) np.array This should be the output, post activation, of the previous layer (z_{k-1}) Returns ------- (N, k) np.array, (N, k) np.array The first array is the gradient for the bias unit The second array is the gradient for the weight matrix """ check_layer(self) # weight_updates = np.array([d[:, None] * z[None, :] for (d, z) in zip(delta, prev_z)]) # weight_updates = np.sum(delta[:, :, None] * prev_z[:, None, :], axis = 0) weight_grad = delta.T @ prev_z delta_grad = np.sum(delta, axis=0) return delta_grad, weight_grad
def predict(self, z, output_only=True, pre_activation_of_input=None): """ Returns the prediction of this layer Parameters ---------- z : (N, *input_shape) np.array The input to be flattened output_only : bool, optional If set to true, then this function will return only the prediction of the neural network. If set to false then this will return the outputs of the individual layers. Unless back propagation is being performed, this should be set to true. pre_activation_of_input : (N, *input_shape) np.array The input, z, before it passed through the activation function Returns ------- (N, *output_shape) np.array The flattened representation of the input OR (if `output_only = False`) (N, *input_shape) np.array, (N, *output_shape) np.array The first np.array will store the output before it has been reshaped The second np.array will store the output after it has been reshaped Notes ----- Since this layer has no activation function, """ check_layer(self) if output_only: return z.reshape(len(z), self.output_shape[0]) return pre_activation_of_input, z.reshape(len(z), self.output_shape[0])
def predict(self, z, output_only=True, pre_activation_of_input=None): """ Returns the output of this layer Parameters ---------- z : (N, i, j, k) np.array Assumed to be many images, which is accessed by the 0th index, for which the window is to be slid across output_only : bool, optional If set to true, then this function will return only the prediction of the neural network. If set to false then this will return the outputs of the individual layers. Unless back propagation is being performed, this should be set to true. Returns ------- (N, a, b, c) np.array The final output of the layer, post activation OR (if `output_only = False`) (N, a, b, c) np.array, (N, a, b, c) np.array The first np.array will store the output before it is passed through the activation function. The second np.array will store the output after it has passed through the activation function. """ check_layer(self) conv = self.perform_conv(z, self.filter, self.b, self.stride) if output_only: return self.activation_function_(conv) return conv, self.activation_function_(conv)
def get_weight_grad_(self, delta, prev_z): """ Get the gradients for the filter matrix and bias units Parameters ---------- delta : (N, *output_shape) np.array In latex, this should be delta_k prev_z : (N, *input_shape) np.array This should be the output, post activation, of the previous layer (z_{k-1}) Returns ------- (filter_num, ) np.array, (*filter_shape) np.array The first array is the gradient for the bias unit The second array is the gradient for the filter matrix """ check_layer(self) # This code is self-explanatory when you look at the math windowed = self.im2window(prev_z, self.filter_spatial_shape, self.stride) # temp = delta[:, :, :, None, None, None, :] * windowed[..., None] # W_grad = np.sum(temp, axis = (0, 1, 2)) w_grad = np.einsum("abcijk,abcl->ijkl", windowed, delta, optimize="greedy") b_grad = np.sum(delta, axis=(0, 1, 2)) return b_grad, w_grad
def get_delta_backprop_(self, g_prime, new_delta, prev_z): """ Returns the delta for the previous layer, delta^{k-1}_{m,j}. Parameters ---------- g_prime : (N, ...) np.array Should be the derivative of the ouput of the previous layer, g'_{k-1}(a^{k-1}_{m,j}) new_delta : (N, ...) np.array The delta for this layer, delta^k_{m, j} prev_z : (N, ...) np.array The input for this layer, z^{k-1} Returns ------- (N, ...) np.array Returns delta of the previous layer, delta^{k-1} Notes ----- We want to return delta^{k-1} because the `sequential` class does not have access to the weights, W. But it does know the values of g'_{k-1} and delta^k, due to forward propagation and the backwards nature of the back propagation algorithm. """ check_layer(self) dz_ = BatchNormGrads.dz(prev_z, new_delta, self.gamma, self.epsilon) return dz_ * prev_z
def get_weight_grad_(self, delta, prev_z): """ Returns the associated partial S/partial W^k, that is the gradient with respect to the weight matrix in the kth layer Parameters ---------- delta : (N, ...) np.array Should be delta_k prev_z : (N, ...) np.array This should be the input of this layer (z_{k-1}) Returns ------- (...) np.array, (...) np.array The first array is the gradient for the bias unit The second array is the gradient for the weight matrix """ check_layer(self) # weight_updates = np.add.reduce(delta * prev_z, axis = 0) # bias_updates = np.add.reduce(delta, axis = 0) # weight_updates = np.sum(delta * prev_z, axis = 0) weight_grad = np.einsum('i...,i...', delta, prev_z) bias_grad = np.sum(delta, axis=0) return bias_grad, weight_grad
def update_parameters_(self, *args, **kwargs): """ Perform an update to the weights by descending down the gradient Notes ----- Since nothing in this layer is trainiable, we can simply pass """ check_layer(self) pass
def update_parameters_(self, bias_updates, weight_updates): """ Perform an update to the weights by descending down the gradient Parameters ---------- bias_updates : (k, ) np.array The gradients for the bias units weight_updates : (k, j) np.array The gradients for the weight matrix """ check_layer(self) self.W -= weight_updates self.b -= bias_updates
def get_weight_grad_(self, *args, **kwargs): """ Returns the associated partial S/partial W^k, that is the gradient with respect to the weight matrix in the kth layer Returns ------- (None, None) Notes ----- Since nothing in this layer is trainiable, the gradients is simply None """ check_layer(self) return None, None
def update_parameters_(self, beta_updates, gamma_updates): """ Perform an update to the weights by descending down the gradient Parameters ---------- beta_updates : np.array (of dimension k) Should be dS/d(beta), as scheduled by the optimizer gamma_updates : np.array (of dimension k) Should be dS/d(gamma), as scheduled by the optimizer """ check_layer(self) self.beta -= beta_updates self.gamma -= gamma_updates
def update_parameters_(self, bias_updates, filter_updates): """ Update the filter and bias by descending down the gradient Parameters ---------- bias_updates : (f, ) np.array Gradient of the bais filter_updates : (:, :, :, f) np.array Gradient of the filter """ check_layer(self) self.filter -= filter_updates self.b -= bias_updates
def get_weight_grad_(self, delta, prev_z): """ Returns the gradients with respect to beta and gamma Parameters ---------- delta : (N, ...) np.array Should be delta^k prev_z : (N, ...) np.array The input of this layer: z^{k-1} Returns ------- (...) np.array, (...) np.array The first np.array is dS/d(beta) The second np.array is dS/d(gamma) """ check_layer(self) z_hat = (prev_z - np.mean(prev_z, axis=0)) / np.sqrt(np.std(prev_z, axis=0) ** 2 + self.epsilon) return np.sum(delta, axis=0), np.sum(delta * z_hat, axis=0)
def predict(self, z, output_only=True, **kwargs): """ Returns the output of this layer Parameters ---------- z : (N, ...) np.array z is assumed to be a list of all the inputs to be forward propagated. In particular it is assumed that the first index of z is the index that inputs is accessed by output_only : bool, optional If set to true, then this function will return only the prediction of the neural network. If set to false then this will return the outputs of the individual layers. Unless back propagation is being performed, this should be set to true. Returns ------- (N, ...) np.array The final output of the layer, post activation OR (if `output_only = False`) (N, ...) np.array, (N, ...) np.array The first np.array will store the output before it is passed through the activation function. The second np.array will store the output after it has passed through the activation function. Notes ----- Since the activation function is linear the 2 arrays, when output_only = True, are the same array """ check_layer(self) mean = np.mean(z, axis=0) std = np.std(z, axis=0) a = self.gamma * ((z - mean) / np.sqrt(std ** 2 + self.epsilon)) + self.beta if output_only: return a return a, a
def get_delta_backprop_(self, g_prime, new_delta, *args, **kwargs): """ Returns the delta for the previous layer, delta^{k-1}_{m,j}. Parameters ---------- g_prime : (N, *input_shape) np.array Should be the derivative of the ouput of the previous layer, g'_{k-1}(a^{k-1}_{m,j}) new_delta : (N, *output_shape) np.array The delta for this layer, delta^{k, f}_{m, j} Returns ------- (N, *input_shape) np.array Returns delta of the previous layer, delta^{k-1} Notes ----- We want to return delta^{k-1} because the `sequential` class does not have access to the filters, W. But it does know the values of g'_{k-1} and delta^k, due to forward propagation and the backwards nature of the back propagation algorithm. """ check_layer(self) # I don't even know how to explain this code # But it makes sense when you look at the math # Essentially these 2 lines returns the elements of the filter # that hits a given pixel position eye = np.eye(np.prod(self.input_shape)).reshape((np.prod(self.input_shape), *self.input_shape)) eye_conv = self.perform_conv(eye, self.filter, np.zeros(self.filter_num), self.stride) # Reshape eye_conv = eye_conv.reshape((*self.input_shape, *self.output_spatial_shape, self.filter_num)) # Self-explanatory once you look at the math # temp = new_delta[:, None, None, None, :, :] * eye_conv[None, :] # delta = np.sum(temp * g_prime[..., None, None, None], axis = (-1, -2, -3)) delta = np.einsum("ijkl,abcjkl,iabc->iabc", new_delta, eye_conv, g_prime, optimize='greedy') return delta
def get_delta_backprop_(self, g_prime, new_delta, *args, **kwargs): """ Returns the delta for the previous layer, delta^{k-1}_{m,j}. Parameters ---------- g_prime : (N, *input_shape) np.array Should be the derivative of the ouput of the previous layer, g'_{k-1}(a^{k-1}_{m,j}) new_delta : (N, *output_shape) np.array The delta for this layer, delta^k_{m, j} Returns ------- (N, *input_shape) np.array Notes ----- Since this is a pass through layer (i.e. linear activation), g_prime = 1, and so can be ignored. The key to this layer is that the delta of the k+1 layer needs to be reshaped for the k-1 layer """ check_layer(self) return new_delta.reshape(len(new_delta), *self.input_shape)
def get_delta_backprop_(self, g_prime, new_delta, *args): """ Returns the delta for the previous layer, delta^{k-1}_{m,j}. Notes ----- We want to return delta^{k-1} because the `sequential` class does not have access to the weights, W. But it does know the values of g'_{k-1} and delta^k, due to forward propagation and the backwards nature of the back propagation algorithm. Parameters ---------- g_prime : (N, j) np.array Should be the derivative of the ouput of the previous layer, g'_{k-1}(a^{k-1}_{m,j}) new_delta : (N, k) np.array The delta for this layer, delta^k_{m, j} Returns ------- np.array Returns delta of the previous layer, delta^{k-1} """ check_layer(self) return g_prime*(new_delta @ self.W)
def summary_(self): check_layer(self) return f'Flatten', f'Output Shape {(None, *self.output_shape)}'
def get_weights(self): check_layer(self) return None, None
def update_parameters_(self, *args, **kwargs): """ Since this layer has no parameters, there is no implementation needed """ check_layer(self) pass
def summary_(self): check_layer(self) return f'Dense {(self.hidden_nodes,)}', f'Output Shape {(None, *self.output_shape)}'
def get_delta_backprop_(self, g_prime, new_delta, *args): """ Since this layer has no parameters, there is no implementation needed """ check_layer(self) return None
def summary_(self): check_layer(self) return f'Input', f'Input Shape {(None, *self.input_shape)}'
def summary_(self): check_layer(self) return f'Batch Norm', f"Output Shape {(None, *self.output_shape)}"
def get_weights(self): check_layer(self) return self.beta, self.gamma
def get_weights(self): check_layer(self) return self.W, self.b
def get_weights(self): check_layer(self) return self.filter, self.b
def summary_(self): check_layer(self) return f'Conv 2D {self.filter_num} x {self.filter_spatial_shape}', f'Output Shape {(None, *self.output_shape)}'
def get_weight_grad_(self, delta, prev_z): """ Since this layer has no parameters, there is no implementation needed """ check_layer(self) return None, None