class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.conv1 = ConvolutionalLayer(input_shape[2], conv1_channels, 3, 1) self.relu1 = ReLULayer() self.max_pl1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.max_pl2 = MaxPoolingLayer(4, 4) self.flat = Flattener() self.fc = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for i_param in self.params(): param = self.params()[i_param] param.grad = np.zeros_like(param.grad) step1 = self.conv1.forward(X) step2 = self.relu1.forward(step1) step3 = self.max_pl1.forward(step2) step4 = self.conv2.forward(step3) step5 = self.relu2.forward(step4) step6 = self.max_pl2.forward(step5) step7 = self.flat.forward(step6) step8 = self.fc.forward(step7) loss, loss_grad = softmax_with_cross_entropy(step8, y) d8 = self.fc.backward(loss_grad) d7 = self.flat.backward(d8) d6 = self.max_pl2.backward(d7) d5 = self.relu2.backward(d6) d4 = self.conv2.backward(d5) d3 = self.max_pl1.backward(d4) d2 = self.relu1.backward(d3) d1 = self.conv1.backward(d2) return loss def predict(self, X): # You can probably copy the code from previous assignment step1 = self.conv1.forward(X) step2 = self.relu1.forward(step1) step3 = self.max_pl1.forward(step2) step4 = self.conv2.forward(step3) step5 = self.relu2.forward(step4) step6 = self.max_pl2.forward(step5) step7 = self.flat.forward(step6) step8 = self.fc.forward(step7) pred = step8.argmax(axis=1) return pred def params(self): result = { 'conv1.W': self.conv1.W, 'conv1.B': self.conv1.B, 'conv2.W': self.conv2.W, 'conv2.B': self.conv2.B, 'fc.W': self.fc.W, 'fc.B': self.fc.B } # TODO: Aggregate all the params from all the layers # which have parameters return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.fcl1 = FullyConnectedLayer(n_input, hidden_layer_size) self.act = ReLULayer() self.fcl2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for p in params.keys(): params[p].grad = np.zeros_like(params[p].value) out1 = self.act.forward(self.fcl1.forward(X)) out2 = self.fcl2.forward(out1) loss, grad = softmax_with_cross_entropy(out2, y) self.fcl1.backward(self.act.backward(self.fcl2.backward(grad))) for p in params.keys(): l2_loss, l2_grad = l2_regularization(params[p].value, self.reg) loss += l2_loss params[p].grad += l2_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ out2 = self.fcl2.forward(self.act.forward(self.fcl1.forward(X))) y_pred = out2.argmax(axis=1) return y_pred def params(self): return { 'W1': self.fcl1.W, 'B1': self.fcl1.B, 'W2': self.fcl2.W, 'B2': self.fcl2.B }
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ _, _, input_channels = input_shape self.conv1 = ConvolutionalLayer(input_channels, conv1_channels, 3, 1) self.relu1 = ReLULayer() self.pool1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.pool2 = MaxPoolingLayer(4, 4) self.flattener = Flattener() self.fc = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ for param_ix in self.params(): self.params()[param_ix].grad = np.zeros_like( self.params()[param_ix].value) out = self.pool1.forward(self.relu1.forward(self.conv1.forward(X))) out = self.pool2.forward(self.relu2.forward(self.conv2.forward(out))) out = self.fc.forward(self.flattener.forward(out)) loss, grad = softmax_with_cross_entropy(out, y) grad = self.flattener.backward(self.fc.backward(grad)) grad = self.conv2.backward( self.relu2.backward(self.pool2.backward(grad))) grad = self.conv1.backward( self.relu1.backward(self.pool1.backward(grad))) return loss def predict(self, X): out = self.pool1.forward(self.relu1.forward(self.conv1.forward(X))) out = self.pool2.forward(self.relu2.forward(self.conv2.forward(out))) out = self.fc.forward(self.flattener.forward(out)) predictions = softmax(out) return np.argmax(predictions, axis=1) def params(self): result = { 'conv1.W': self.conv1.W, 'conv1.B': self.conv1.B, 'conv2.W': self.conv1.W, 'conv2.B': self.conv1.B, 'fc.W': self.fc.W, 'fc.B': self.fc.B } return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.hidden_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.non_linearity = ReLULayer() self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params_ = self.params() for param in params_: params_[param].clear_grad() # TODO Compute loss and fill param gradients # by running forward and backward passes through the model temp_res = self.hidden_layer.forward(X) temp_res = self.non_linearity.forward(temp_res) temp_res = self.output_layer.forward(temp_res) loss, dpred = softmax_with_cross_entropy(temp_res, y) temp_grad = self.output_layer.backward(dpred) temp_grad = self.non_linearity.backward(temp_grad) temp_grad = self.hidden_layer.backward(temp_grad) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! params_ = self.params() for param in params_: loss_l2, grad_l2 = l2_regularization(params_[param].value, self.reg) loss += loss_l2 params_[param].grad += grad_l2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) temp_res = self.hidden_layer.forward(X) temp_res = self.non_linearity.forward(temp_res) pred = self.output_layer.forward(temp_res) pred = np.argmax(softmax(pred), axis=1) return pred def params(self): result = {} # TODO Implement aggregating all of the params result['W_h'] = self.hidden_layer.W result['B_h'] = self.hidden_layer.B result['W_o'] = self.output_layer.W result['B_o'] = self.output_layer.B return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers width, height, n_channels = input_shape self.conv1 = ConvolutionalLayer(n_channels, conv1_channels, 3, 1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(4, 4) self.flatten = Flattener() self.fc = FullyConnectedLayer( (height // 4 // 4) * (width // 4 // 4) * conv2_channels, n_output_classes) self.conv1_params = self.conv1.params() self.conv2_params = self.conv2.params() self.fc_params = self.fc.params() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for key, value in self.params().items(): value.grad.fill(0) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment conv1 = self.conv1.forward(X) relu1 = self.relu1.forward(conv1) maxpool1 = self.maxpool1.forward(relu1) conv2 = self.conv2.forward(maxpool1) relu2 = self.relu2.forward(conv2) maxpool2 = self.maxpool2.forward(relu2) flatten = self.flatten.forward(maxpool2) fc = self.fc.forward(flatten) loss, d_preds = softmax_with_cross_entropy(fc, y) fc = self.fc.backward(d_preds) flatten = self.flatten.backward(fc) maxpool2 = self.maxpool2.backward(flatten) relu2 = self.relu2.backward(maxpool2) conv2 = self.conv2.backward(relu2) maxpool1 = self.maxpool1.backward(conv2) relu1 = self.relu1.backward(maxpool1) conv1 = self.conv1.backward(relu1) return loss def predict(self, X): # You can probably copy the code from previous assignment conv1 = self.conv1.forward(X) relu1 = self.relu1.forward(conv1) maxpool1 = self.maxpool1.forward(relu1) conv2 = self.conv2.forward(maxpool1) relu2 = self.relu2.forward(conv2) maxpool2 = self.maxpool2.forward(relu2) flatten = self.flatten.forward(maxpool2) fc = self.fc.forward(flatten) return np.argmax(fc, axis=1) def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters d1 = {k + '1': v for k, v in self.conv1_params.items()} d2 = {k + '2': v for k, v in self.conv2_params.items()} d3 = {k + '3': v for k, v in self.fc_params.items()} result = {**d1, **d2, **d3} return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ filter_size = 3 pool_size = 4 self.conv1 = ConvolutionalLayer(input_shape[2], conv1_channels, filter_size, padding=1) self.relu1 = ReLULayer() self.max_pool1 = MaxPoolingLayer(pool_size, stride=pool_size) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding=1) self.relu2 = ReLULayer() self.max_pool2 = MaxPoolingLayer(pool_size, stride=pool_size) self.flatten = Flattener() self.fc = FullyConnectedLayer(n_input=4 * conv2_channels, n_output=n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass params = self.params() for param_key, param_value in params.items(): param_value.grad = np.zeros_like(param_value.value) # Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment X = self.conv1.forward(X) X = self.relu1.forward(X) X = self.max_pool1.forward(X) X = self.conv2.forward(X) X = self.relu2.forward(X) X = self.max_pool2.forward(X) X = self.flatten.forward(X) X = self.fc.forward(X) loss, grad = softmax_with_cross_entropy(X, y) grad = self.fc.backward(grad) grad = self.flatten.backward(grad) grad = self.max_pool2.backward(grad) grad = self.relu2.backward(grad) grad = self.conv2.backward(grad) grad = self.max_pool1.backward(grad) grad = self.relu1.backward(grad) grad = self.conv1.backward(grad) return loss def predict(self, X): X = self.conv1.forward(X) X = self.relu1.forward(X) X = self.max_pool1.forward(X) X = self.conv2.forward(X) X = self.relu2.forward(X) X = self.max_pool2.forward(X) X = self.flatten.forward(X) X = self.fc.forward(X) X = softmax(X) return np.argmax(X, axis=1) def params(self): # Aggregate all the params from all the layers # which have parameters result = {} layers_with_params = [self.conv1, self.conv2, self.fc] for i in range(len(layers_with_params)): layer = layers_with_params[i] layer_number = str(i) for param_key, param_value in layer.params().items(): result[param_key + str(layer_number)] = param_value return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.layer_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.layer_2 = ReLULayer() self.layer_3 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for p in params: param = params[p] param.grad = np.zeros_like(param.grad) step_1f = self.layer_1.forward(X) step_2f = self.layer_2.forward(step_1f) step_3f = self.layer_3.forward(step_2f) loss, dpred = softmax_with_cross_entropy(step_3f, y) step_3b = self.layer_3.backward(dpred) step_2b = self.layer_2.backward(step_3b) step_1b = self.layer_1.backward(step_2b) for p in params: param = params[p] loss_l2, grad_l2 = l2_regularization(param.value, self.reg) param.grad += grad_l2 loss += loss_l2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ step_1f = self.layer_1.forward(X) step_2f = self.layer_2.forward(step_1f) step_3f = self.layer_3.forward(step_2f) probs = softmax(step_3f) pred = np.array(list(map(lambda x: x.argsort()[-1], probs))) return pred def params(self): result = {} result['W1'] = self.layer_1.W result['W2'] = self.layer_3.W result['B1'] = self.layer_1.B result['B2'] = self.layer_3.B return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.lr = 1 # TODO Create necessary layers self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) self.layers = (self.fc1, self.fc2) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params1 = self.fc1.params() params2 = self.fc2.params() for key in ['W', 'B']: params1[key].grad.fill(0) params2[key].grad.fill(0) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model self.relu1 = ReLULayer() x = self.relu1.forward(self.fc1.forward(X)) y_pred = self.fc2.forward(x) loss, dpred = softmax_with_cross_entropy(y_pred, y) dout = self.fc2.backward(dpred) dout = self.relu1.backward(dout) dout = self.fc1.backward(dout) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! loss_fc1_W_reg, grad_fc1_W_reg = l2_regularization( params1['W'].value, self.reg) loss_fc1_B_reg, grad_fc1_B_reg = l2_regularization( params1['B'].value, self.reg) loss_fc2_W_reg, grad_fc2_W_reg = l2_regularization( params2['W'].value, self.reg) loss_fc2_B_reg, grad_fc2_B_reg = l2_regularization( params2['B'].value, self.reg) self.fc2.W.grad += grad_fc2_W_reg self.fc2.B.grad += grad_fc2_B_reg self.fc1.W.grad += grad_fc1_W_reg self.fc1.B.grad += grad_fc1_B_reg return loss + (loss_fc1_W_reg + loss_fc1_B_reg + loss_fc2_W_reg + loss_fc2_B_reg) def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused x = self.relu1.forward(self.fc1.forward(X)) probs = self.fc2.forward(x) return np.argmax(probs, axis=1) def params(self): # TODO Implement aggregating all of the params result = {} for layer in self.layers: for k, param in layer.params().items(): result[' '.join([str(id(layer)), k])] = param return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ width, height, channels = input_shape filter_size = 3 padding = 1 pool_size = 4 pool_stride = 4 self.Conv1 = ConvolutionalLayer(channels, conv1_channels, filter_size, padding) self.ReLU1 = ReLULayer() self.MaxPool1 = MaxPoolingLayer(pool_size, pool_stride) self.Conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding) self.ReLU2 = ReLULayer() self.MaxPool2 = MaxPoolingLayer(pool_size, pool_stride) left_width = width // pool_stride // pool_stride left_height = height // pool_stride // pool_stride self.Flat = Flattener() self.FullyConnected = FullyConnectedLayer( left_width * left_height * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for _, v in self.params().items(): v.grad = np.zeros(v.grad.shape) out = self.Conv1.forward(X) out = self.ReLU1.forward(out) out = self.MaxPool1.forward(out) out = self.Conv2.forward(out) out = self.ReLU2.forward(out) out = self.MaxPool2.forward(out) out = self.Flat.forward(out) out = self.FullyConnected.forward(out) loss, d_out = softmax_with_cross_entropy(out, y) d_out = self.FullyConnected.backward(d_out) d_out = self.Flat.backward(d_out) d_out = self.MaxPool2.backward(d_out) d_out = self.ReLU2.backward(d_out) d_out = self.Conv2.backward(d_out) d_out = self.MaxPool1.backward(d_out) d_out = self.ReLU1.backward(d_out) d_out = self.Conv1.backward(d_out) return loss def predict(self, X): # You can probably copy the code from previous assignment out = self.Conv1.forward(X) out = self.ReLU1.forward(out) out = self.MaxPool1.forward(out) out = self.Conv2.forward(out) out = self.ReLU2.forward(out) out = self.MaxPool2.forward(out) out = self.Flat.forward(out) out = self.FullyConnected.forward(out) pred = np.argmax(out, axis=1) return pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters name2layer = { "Conv1": self.Conv1, "Conv2": self.Conv2, "Fully": self.FullyConnected } for name, layer in name2layer.items(): for k, v in layer.params().items(): result['{}_{}'.format(name, k)] = v return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.hidden_layer = (FullyConnectedLayer(n_input, hidden_layer_size), ReLULayer()) self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ for p in self.params().values(): p.grad = np.zeros_like(p.value) h0 = self.hidden_layer[0].forward(X) h1 = self.hidden_layer[1].forward(h0) o = self.output_layer.forward(h1) loss_unreg, loss_unreg_grad = softmax_with_cross_entropy(o, y) o_grad = self.output_layer.backward(loss_unreg_grad) h1_grad = self.hidden_layer[1].backward(o_grad) h0_grad = self.hidden_layer[0].backward(h1_grad) loss_reg = 0 for p in self.params().values(): p_reg, p_reg_grad = l2_regularization(p.value, self.reg) p.grad += p_reg_grad loss_reg += p_reg return loss_unreg + loss_reg def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ h0 = self.hidden_layer[0].forward(X) h1 = self.hidden_layer[1].forward(h0) o = self.output_layer.forward(h1) return np.argmax(o, axis=1) def params(self): hidden_params = self.hidden_layer[0].params() output_params = self.output_layer.params() return { 'hW': hidden_params['W'], 'hB': hidden_params['B'], 'oW': output_params['W'], 'oB': output_params['B'] }
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.fcl1 = FullyConnectedLayer(n_input, hidden_layer_size) self.fcl2 = FullyConnectedLayer(hidden_layer_size, n_output) self.relu = ReLULayer() self.reg = reg self.w1 = self.fcl1.params()['W'] self.w2 = self.fcl2.params()['W'] self.b1 = self.fcl1.params()['B'] self.b2 = self.fcl1.params()['B'] # TODO Create necessary layers # raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! [self.params()[param].grad.fill(0) for param in self.params().keys()] # raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model hidden_res_forward = self.fcl1.forward(X) hidden_res_forward = self.relu.forward(hidden_res_forward) output = self.fcl2.forward(hidden_res_forward) loss, dprediction = softmax_with_cross_entropy(output, y) hidden_res_backward = self.fcl2.backward(dprediction) hidden_res_backward = self.relu.backward(hidden_res_backward) self.fcl1.backward(hidden_res_backward) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for param in self.params().values(): reg_loss, reg_grad = l2_regularization(param.value, self.reg) loss += reg_loss param.grad += reg_grad # raise Exception("Not implemented!") return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) pred = np.argmax( softmax(self.fcl2.forward(self.relu.forward( self.fcl1.forward(X)))), 1) # raise Exception("Not implemented!") return pred def params(self): result = {"W1": self.w1, "W2": self.w2, "B1": self.b1, "B2": self.b2} # TODO Implement aggregating all of the params # raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO_ Create necessary layers # raise Exception("Not implemented!") self.layer1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu_layer = ReLULayer() self.layer2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO_ Set parameter gradient to zeros # Hint: using self.params() might be useful! # raise Exception("Not implemented!") # initialization params = self.params() W1 = params["W1"] B1 = params["B1"] W2 = params["W2"] B2 = params["B2"] # the cleaning of gradients W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) # forward pass out1 = self.layer1.forward(X) out_relu = self.relu_layer.forward(out1) out2 = self.layer2.forward(out_relu) loss, d_preds = softmax_with_cross_entropy(out2, y) # backward pass d_out2 = self.layer2.backward(d_preds) d_out_relu = self.relu_layer.backward(d_out2) d_out1 = self.layer1.backward(d_out_relu) # TODO_ Compute loss and fill param gradients # by running forward and backward passes through the model # After that, implement l2 regularization on all params # Hint: self.params() is useful again! # raise Exception("Not implemented!") # add regularization l2_W1_loss, l2_W1_grad = l2_regularization(W1.value, self.reg) l2_B1_loss, l2_B1_grad = l2_regularization(B1.value, self.reg) l2_W2_loss, l2_W2_grad = l2_regularization(W2.value, self.reg) l2_B2_loss, l2_B2_grad = l2_regularization(B2.value, self.reg) l2_reg = l2_W1_loss + l2_B1_loss + l2_W2_loss + l2_B2_loss loss += l2_reg W1.grad += l2_W1_grad B1.grad += l2_B1_grad W2.grad += l2_W2_grad B2.grad += l2_B2_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO_: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused y_pred = np.zeros(X.shape[0], np.int) # raise Exception("Not implemented!") out1 = self.layer1.forward(X) out_relu = self.relu_layer.forward(out1) predictions = self.layer2.forward(out_relu) probs = softmax(predictions) y_pred = np.argmax(probs, axis=1) return y_pred def params(self): result = { "W1": self.layer1.params()["W"], "B1": self.layer1.params()["B"], "W2": self.layer2.params()["W"], "B2": self.layer2.params()["B"] } # TODO_ Implement aggregating all of the params # raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.layer1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.layer2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for _, param in self.params().items(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model out1 = self.layer1.forward(X) out_relu = self.relu.forward(out1) out2 = self.layer2.forward(out_relu) loss, d_preds = softmax_with_cross_entropy(out2, y) b_out2 = self.layer2.backward(d_preds) b_out_relu = self.relu.backward(b_out2) b_out1 = self.layer1.backward(b_out_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for _, param in self.params().items(): loss_l2, grad_l2 = l2_regularization(param.value, self.reg) loss += loss_l2 param.grad += grad_l2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) out1 = self.layer1.forward(X) relu_out = self.relu.forward(out1) out2 = self.layer2.forward(relu_out) pred = np.argmax(softmax(out2), axis=1) return pred def params(self): result = {} # TODO Implement aggregating all of the params result = { 'W1': self.layer1.params()['W'], 'B1': self.layer1.params()['B'], 'W2': self.layer2.params()['W'], 'B2': self.layer2.params()['B'] } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.height = input_shape[0] self.width = input_shape[1] self.input_channels = input_shape[2] self.n_output_classes = n_output_classes self.conv1_channels = conv1_channels self.conv2_channels = conv2_channels self.conv1_layer = ConvolutionalLayer(self.input_channels, self.conv1_channels, 3, 1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(4, 4) self.conv2_layer = ConvolutionalLayer(self.conv1_channels, self.conv2_channels, 3, 1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(4, 4) self.flattener = Flattener() self.fc_layer = FullyConnectedLayer(2*2*self.conv2_channels, self.n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # nullify layers gradients # Conv1 Layer self.params()['W1'].grad = np.zeros((3, 3, self.input_channels, self.conv1_channels)) self.params()['B1'].grad = np.zeros(self.conv1_channels) # Conv2 Layer self.params()['W2'].grad = np.zeros((3, 3, self.conv1_channels, self.conv2_channels)) self.params()['B2'].grad = np.zeros(self.conv2_channels) # FC Layer self.params()['W3'].grad = np.zeros((2*2*self.conv2_channels, self.n_output_classes)) self.params()['B3'].grad = np.zeros((1, self.n_output_classes)) # forward conv layer 1 conv_forward1 = self.conv1_layer.forward(X) # forward relu activation funtcion 1 relu_forward1 = self.relu1.forward(conv_forward1) # forward maxpool layer 1 maxpool_forward1 = self.maxpool1.forward(relu_forward1) # forward conv layer 2 conv_forward2 = self.conv2_layer.forward(maxpool_forward1) # forward relu activation funtcion 2 relu_forward2 = self.relu2.forward(conv_forward2) # forward maxpool layer 2 maxpool_forward2 = self.maxpool2.forward(relu_forward2) # forward flattener layer flattener_forward = self.flattener.forward(maxpool_forward2) # forward FC layer fc_forward = self.fc_layer.forward(flattener_forward) # calculate loss and grad loss, grad = softmax_with_cross_entropy(fc_forward, y) # backward FC layer fc_backward = self.fc_layer.backward(grad) # backward flattener layer flattener_backward = self.flattener.backward(fc_backward) # backward maxpool layer 2 maxpool_backward2 = self.maxpool2.backward(flattener_backward) # backward relu activation funtcion 2 relu_backward2 = self.relu2.backward(maxpool_backward2) # forward conv layer 2 conv_backward2 = self.conv2_layer.backward(relu_backward2) # backward maxpool layer 1 maxpool_backward1 = self.maxpool1.backward(conv_backward2) # backward relu activation funtcion 1 relu_backward1 = self.relu1.backward(maxpool_backward1) # forward conv layer 1 conv_backward1 = self.conv1_layer.backward(relu_backward1) return loss def predict(self, X): # forward conv layer 1 conv_forward1 = self.conv1_layer.forward(X) # forward relu activation funtcion 1 relu_forward1 = self.relu1.forward(conv_forward1) # forward maxpool layer 1 maxpool_forward1 = self.maxpool1.forward(relu_forward1) # forward conv layer 2 conv_forward2 = self.conv2_layer.forward(maxpool_forward1) # forward relu activation funtcion 2 relu_forward2 = self.relu2.forward(conv_forward2) # forward maxpool layer 2 maxpool_forward2 = self.maxpool2.forward(relu_forward2) # forward flattener layer flattener_forward = self.flattener.forward(maxpool_forward2) # forward FC layer fc_forward = self.fc_layer.forward(flattener_forward) # make prediction prediciton = fc_forward.argmax(axis=1) return prediciton def params(self): result = {'W1': self.conv1_layer.params()['W'], 'B1': self.conv1_layer.params()['B'], 'W2': self.conv2_layer.params()['W'], 'B2': self.conv2_layer.params()['B'], 'W3': self.fc_layer.params()['W'], 'B3': self.fc_layer.params()['B']} return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.input_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) self.W_in = self.input_layer.params()['W'] self.W_out = self.output_layer.params()['W'] self.B_in = self.input_layer.params()['B'] self.B_out = self.output_layer.params()['B'] def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for _, param in self.params().items(): param.grad.fill(0) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model to_relu = self.input_layer.forward(X) to_output_layer = self.relu.forward(to_relu) pred = self.output_layer.forward(to_output_layer) loss, dprediction = softmax_with_cross_entropy(pred, y) grad_output_layer = self.output_layer.backward(dprediction) grad_relu_layer = self.relu.backward(grad_output_layer) grad_input_layer = self.input_layer.backward(grad_relu_layer) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for key, param in self.params().items(): loss_l2, grad_l2 = l2_regularization(param.value, self.reg) loss += loss_l2 param.grad += grad_l2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ to_relu = self.input_layer.forward(X) to_output_layer = self.relu.forward(to_relu) weights = self.output_layer.forward(to_output_layer) pred = np.argmax(weights, axis=-1) return pred def params(self): # TODO Implement aggregating all of the params result = { 'W_out': self.W_out, 'W_in': self.W_in, 'B_out': self.B_out, 'B_in': self.B_in } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers input_width, input_height, input_channels = input_shape self.conv1 = ConvolutionalLayer(input_channels, conv1_channels, filter_size=3, padding=1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(pool_size=4, stride=4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size=3, padding=1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(pool_size=4, stride=4) self.flattener = Flattener() self.fc = FullyConnectedLayer( input_width * input_height * conv2_channels // (4**4), n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for layer in (self.conv1, self.conv2, self.fc): for param in layer.params().values(): param.zero_grad() # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment conv1_fwd = self.conv1.forward(X) relu1_fwd = self.relu1.forward(conv1_fwd) maxpool1_fwd = self.maxpool1.forward(relu1_fwd) conv2_fwd = self.conv2.forward(maxpool1_fwd) relu2_fwd = self.relu2.forward(conv2_fwd) maxpool2_fwd = self.maxpool2.forward(relu2_fwd) flattener_fwd = self.flattener.forward(maxpool2_fwd) fc_fwd = self.fc.forward(flattener_fwd) loss, dprediction = softmax_with_cross_entropy(fc_fwd, y) fc_bwd = self.fc.backward(dprediction) flattener_bwd = self.flattener.backward(fc_bwd) maxpool2_bwd = self.maxpool2.backward(flattener_bwd) relu2_bwd = self.relu2.backward(maxpool2_bwd) conv2_bwd = self.conv2.backward(relu2_bwd) maxpool1_bwd = self.maxpool1.backward(conv2_bwd) relu1_bwd = self.relu1.backward(maxpool1_bwd) conv1_bwd = self.conv1.backward(relu1_bwd) return loss def predict(self, X): # You can probably copy the code from previous assignment pred = np.zeros(X.shape[0], np.int) conv1_fwd = self.conv1.forward(X) relu1_fwd = self.relu1.forward(conv1_fwd) maxpool1_fwd = self.maxpool1.forward(relu1_fwd) conv2_fwd = self.conv2.forward(maxpool1_fwd) relu2_fwd = self.relu2.forward(conv2_fwd) maxpool2_fwd = self.maxpool2.forward(relu2_fwd) flattener_fwd = self.flattener.forward(maxpool2_fwd) fc_fwd = self.fc.forward(flattener_fwd) pred = np.argmax(fc_fwd, axis=1) return pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters for layer_name, layer in (('conv1', self.conv1), ('conv2', self.conv2), ('fc', self.fc)): params = layer.params() for param_name in params: result[f'{layer_name}.{param_name}'] = params[param_name] return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.input_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for param in params: params[param].clear_grad() res = self.input_layer.forward(X) res = self.relu.forward(res) res = self.output_layer.forward(res) loss, dpred = softmax_with_cross_entropy(res, y) grad = self.output_layer.backward(dpred) grad = self.relu.backward(grad) grad = self.input_layer.backward(grad) for param in params: loss_l2, grad_l2 = l2_regularization(params[param].value, self.reg) loss += loss_l2 params[param].grad += grad_l2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ pred = self.input_layer.forward(X) pred = self.relu.forward(pred) pred = self.output_layer.forward(pred) pred = np.argmax(softmax(pred), axis=1) return pred def params(self): result = {'W_h': self.input_layer.W, 'B_h': self.input_layer.B, 'W_o': self.output_layer.W, 'B_o': self.output_layer.B} return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.layer1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.layer2 = FullyConnectedLayer(hidden_layer_size, n_output) def zero_grads(self): for param in self.params().values(): param.grad[:] = 0.0 def forward(self, X): out1 = self.layer1.forward(X) out2 = self.relu.forward(out1) preds = self.layer2.forward(out2) return preds def backward(self, d_preds): d_layer2 = self.layer2.backward(d_preds) d_relu = self.relu.backward(d_layer2) d_layer1 = self.layer1.backward(d_relu) return d_layer1 def l2_regularization(self): for param in self.params().values(): l2_loss, l2_grad = l2_regularization(param.value, self.reg) param.grad += l2_grad return l2_loss def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ self.zero_grads() preds = self.forward(X) loss, d_preds = softmax_with_cross_entropy(preds, y) self.backward(d_preds) loss += self.l2_regularization() return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ preds = self.forward(X) return preds.argmax(axis=1) def params(self): result = {} result['W1'] = self.layer1.W result['B1'] = self.layer1.B result['W2'] = self.layer2.W result['B2'] = self.layer2.B return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.layer_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.layer_2 = FullyConnectedLayer(hidden_layer_size, n_output) self.hidden_layer_size = hidden_layer_size self.n_input = n_input self.n_output = n_output #raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros for v in self.params().values(): v.grad.fill(0) # Hint: using self.params() might be useful! #raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model #Compute FullyConnectedLayer_1 l_1 = self.layer_1.forward(X) #Compute ReLuLayer l_relu = self.relu.forward(l_1) #Compute FullyConnectedLayer_2 l_2 = self.layer_2.forward(l_relu) #compute loss and grad of F loss, grad_pred = softmax_with_cross_entropy(l_2, y) for v in self.params().values(): l2_loss, l2_grad = l2_regularization(v.value, self.reg) loss += l2_loss v.grad += l2_grad grad_l_2 = self.layer_2.backward(grad_pred) grad_relu = self.relu.backward(grad_l_2) grad_l_1 = self.layer_1.backward(grad_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! #raise Exception("Not implemented!") return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) #Compute FullyConnectedLayer_1 l_1 = self.layer_1.forward(X) #Compute ReLuLayer l_relu = self.relu.forward(l_1) #Compute FullyConnectedLayer_2 l_2 = self.layer_2.forward(l_relu) #Compute pred pred = np.argmax(l_2, axis=1) #raise Exception("Not implemented!") return pred def params(self): p1 = self.layer_1.params() p2 = self.layer_2.params() result = {"W1": p1["W"], "B1": p1["B"], "W2": p2["W"], "B2": p2["B"]} #result = {'W1': self.w1, 'W2': self.w2, 'B1': self.b1, 'B2': self.b2} # TODO Implement aggregating all of the params #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.hidden_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.relu_layer = ReLULayer() self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) self.n_input = n_input self.n_output = n_output self.hidden_layer_size = hidden_layer_size # TODO Create necessary layers # raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! hidden_layer_params = self.hidden_layer.params() output_layer_params = self.output_layer.params() hidden_layer_params['W'].grad = np.zeros_like( hidden_layer_params['W'].grad) hidden_layer_params['B'].grad = np.zeros_like( hidden_layer_params['B'].grad) output_layer_params['W'].grad = np.zeros_like( output_layer_params['W'].grad) output_layer_params['B'].grad = np.zeros_like( output_layer_params['B'].grad) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model hidden_l_out = self.hidden_layer.forward(X) relu_l_out = self.relu_layer.forward(hidden_l_out) output_l_out = self.output_layer.forward(relu_l_out) ce_loss, d_pred = softmax_with_cross_entropy(output_l_out, y) reg_loss_first, d_R_first = l2_regularization( hidden_layer_params['W'].value, self.reg) reg_loss_second, d_R_second = l2_regularization( output_layer_params['W'].value, self.reg) loss = ce_loss + reg_loss_first + reg_loss_second d_input_out_layer = self.output_layer.backward(d_pred) output_layer_params['W'].grad += d_R_second d_input_relu_layer = self.relu_layer.backward(d_input_out_layer) d_input_hidden_layer = self.hidden_layer.backward(d_input_relu_layer) hidden_layer_params['W'].grad += d_R_first # After that, implement l2 regularization on all params # Hint: self.params() is useful again return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused hidden_l_output = self.hidden_layer.forward(X) relu_output = self.relu_layer.forward(hidden_l_output) output_l_output = self.output_layer.forward(relu_output) pred = np.argmax(output_l_output, axis=1) return pred def params(self): result = {} # TODO Implement aggregating all of the params hidden_layer_params = self.hidden_layer.params() result['W1'] = hidden_layer_params['W'] result['B1'] = hidden_layer_params['B'] output_layer_params = self.output_layer.params() result['W2'] = output_layer_params['W'] result['B2'] = output_layer_params['B'] return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.input_shape = input_shape self.n_output_classes = n_output_classes self.layer1 = ConvolutionalLayer(3, conv1_channels, 3, 1) #32x32x3xconv1_channels self.layer2 = ReLULayer() self.layer3 = MaxPoolingLayer(4, 4) #8x8x3xconv1_channels self.layer4 = ConvolutionalLayer( conv1_channels, conv2_channels, 3, 1) #8x8x3x conv1_channels x conv2_channels self.layer5 = ReLULayer() self.layer6 = MaxPoolingLayer( 4, 4) #2x2x3 conv1_channels x conv2_channels self.layer7 = Flattener() self.layer8 = FullyConnectedLayer(conv1_channels * conv2_channels * 2, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment self.layer1.W.grad = np.zeros_like(self.layer1.W.grad) self.layer4.W.grad = np.zeros_like(self.layer4.W.grad) self.layer1.B.grad = np.zeros_like(self.layer1.B.grad) self.layer4.B.grad = np.zeros_like(self.layer4.B.grad) self.layer8.W.grad = np.zeros_like(self.layer8.W.grad) self.layer8.B.grad = np.zeros_like(self.layer8.B.grad) out1 = self.layer1.forward(X) out2 = self.layer2.forward(out1) out3 = self.layer3.forward(out2) out4 = self.layer4.forward(out3) out5 = self.layer5.forward(out4) out6 = self.layer6.forward(out5) out7 = self.layer7.forward(out6) out8 = self.layer8.forward(out7) loss, grad = softmax_with_cross_entropy(out8, y) back8 = self.layer8.backward(grad) back7 = self.layer7.backward(back8) back6 = self.layer6.backward(back7) back5 = self.layer5.backward(back6) back4 = self.layer4.backward(back5) back3 = self.layer3.backward(back4) back2 = self.layer2.backward(back3) back1 = self.layer1.backward(back2) return loss def predict(self, X): # You can probably copy the code from previous assignment out1 = self.layer1.forward(X) out2 = self.layer2.forward(out1) out3 = self.layer3.forward(out2) out4 = self.layer4.forward(out3) out5 = self.layer5.forward(out4) out6 = self.layer6.forward(out5) out7 = self.layer7.forward(out6) out8 = self.layer8.forward(out7) pred = np.argmax(out8, axis=1) return pred def params(self): result = { 'layer1.W': self.layer1.W, 'layer1.B': self.layer1.B, 'layer4.W': self.layer4.W, 'layer1.B': self.layer4.B, 'layer8.W': self.layer8.W, 'layer8.B': self.layer8.B, } # TODO: Aggregate all the params from all the layers # which have parameters #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.act1 = ReLULayer() self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) self.act2 = ReLULayer() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Clear gradients params = self.params() for p in params: params[p].grad = 0 X = self.fc1.forward(X) X = self.act1.forward(X) X = self.fc2.forward(X) loss, d_pred = softmax_with_cross_entropy(X, y) # X = self.act2.forward(X) # d_act2 = self.act2.backward(d_pred) d_fc2 = self.fc2.backward(d_pred) d_act1 = self.act1.backward(d_fc2) d_fc1 = self.fc1.backward(d_act1) for p in params: regular_loss, regular_grad = l2_regularization( params[p].value, self.reg) loss += regular_loss params[p].grad += regular_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ pred = np.zeros(X.shape[0], np.int) X = self.fc1.forward(X) X = self.act1.forward(X) X = self.fc2.forward(X) X = self.act2.forward(X) pred = np.argmax(X, axis=1) return pred def params(self): result = {} for param in self.fc1.params(): result[param + '_fc1'] = self.fc1.params()[param] for param in self.fc2.params(): result[param + '_fc2'] = self.fc2.params()[param] return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.FCL1 = FullyConnectedLayer(n_input, hidden_layer_size) self.ReLu = ReLULayer() self.FCL2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! prms = self.params() for p in prms: prms[p].grad = np.zeros_like(prms[p].value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model preds = self.FCL2.forward( self.ReLu.forward( self.FCL1.forward(X))) (loss, dprediction) = softmax_with_cross_entropy(preds, y) d_fcl2 = self.FCL2.backward(dprediction) d_relu = self.ReLu.backward(d_fcl2) d_fcl1 = self.FCL1.backward(d_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for p in prms: (loss_l2, grad_l2) = l2_regularization(prms[p].value, self.reg) loss += loss_l2 prms[p].grad += grad_l2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused probs = softmax(self.FCL2.forward( self.ReLu.forward( self.FCL1.forward(X)))) pred = np.argmax(probs, axis=1).astype(np.int) return pred def params(self): result = {} # TODO Implement aggregating all of the params result['W1'] = self.FCL1.W result['B1'] = self.FCL1.B result['W2'] = self.FCL2.W result['B2'] = self.FCL2.B return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.ReLU = ReLULayer() self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) # self.n_output = n_output def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for param in self.params().values(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model fc1_forw = self.fc1.forward(X) relu_forw = self.ReLU.forward(fc1_forw) fc2_forw = self.fc2.forward(relu_forw) loss, grad = softmax_with_cross_entropy(fc2_forw, y + 1) fc2_back = self.fc2.backward(grad) relu_back = self.ReLU.backward(fc2_back) self.fc1.backward(relu_back) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! loss_l2_fc1, grad_l2_fc1 = l2_regularization( self.params()['fc1_W'].value, self.reg) loss_l2_fc2, grad_l2_fc2 = l2_regularization( self.params()['fc2_W'].value, self.reg) self.params()['fc1_W'].grad += grad_l2_fc1 self.params()['fc2_W'].grad += grad_l2_fc2 loss += loss_l2_fc1 + loss_l2_fc2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused predictions = self.fc2.forward(self.ReLU.forward(self.fc1.forward(X))) pred = np.argmax(predictions, axis=1) return pred def params(self): result = { 'fc1_W': self.fc1.params()['W'], 'fc2_W': self.fc2.params()['W'], 'fc1_B': self.fc1.params()['B'], 'fc2_B': self.fc2.params()['B'] } # TODO Implement aggregating all of the params return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.ReLU1 = ReLULayer() self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params = self.params() for key in params.keys(): params[key].grad = np.zeros_like(params[key].value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model # After that, implement l2 regularization on all params # Hint: self.params() is useful again! out1 = self.ReLU1.forward(self.fc1.forward(X)) out2 = self.fc2.forward(out1) loss, grad = softmax_with_cross_entropy(out2, y) for key in params.keys(): l2_loss, l2_grad = l2_regularization(params[key].value, self.reg) loss += l2_loss params[key].grad += l2_grad d_out2 = self.fc2.backward(grad) self.fc1.backward(self.ReLU1.backward(d_out2)) return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused out1 = self.ReLU1.forward(self.fc1.forward(X)) out2 = self.fc2.forward(out1) pred = out2.argmax(axis=1) return pred def params(self): result = {} # TODO Implement aggregating all of the params result['W1'] = self.fc1.W result['B1'] = self.fc1.B result['W2'] = self.fc2.W result['B2'] = self.fc2.B return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.layer_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.non_linier = ReLULayer() self.layer_2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() W1 = params["W1"] B1 = params["B1"] W2 = params["W2"] B2 = params["B2"] W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) f1 = self.layer_1.forward(X) f2 = self.non_linier.forward(f1) f3 = self.layer_2.forward(f2) loss, grad = softmax_with_cross_entropy(f3, y) d2 = self.layer_2.backward(grad) d_nl = self.non_linier.backward(d2) d1 = self.layer_1.backward(d_nl) l2_W1_loss, l2_W1_grad = l2_regularization(W1.value, self.reg) l2_B1_loss, l2_B1_grad = l2_regularization(B1.value, self.reg) l2_W2_loss, l2_W2_grad = l2_regularization(W2.value, self.reg) l2_B2_loss, l2_B2_grad = l2_regularization(B2.value, self.reg) l2_reg = l2_W1_loss + l2_B1_loss + l2_W2_loss + l2_B2_loss loss += l2_reg W1.grad += l2_W1_grad B1.grad += l2_B1_grad W2.grad += l2_W2_grad B2.grad += l2_B2_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ y_pred = np.zeros(X.shape[0], np.int) out1 = self.layer_1.forward(X) out_relu = self.non_linier.forward(out1) predictions = self.layer_2.forward(out_relu) probs = softmax(predictions) y_pred = np.argmax(probs, axis=1) return y_pred def params(self): result = { 'W1': self.layer_1.W, 'B1': self.layer_1.B, 'W2': self.layer_2.W, 'B2': self.layer_2.B } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.out_classes = n_output_classes image_width, image_height, in_channels = input_shape self.Conv1 = ConvolutionalLayer(in_channels, conv1_channels, 3, 1) self.ReLU1 = ReLULayer() self.MaxPool1 = MaxPoolingLayer(4, 4) self.Conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.ReLU2 = ReLULayer() self.MaxPool2 = MaxPoolingLayer(4, 4) self.Flatten = Flattener() self.FC = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for param in self.params().values(): param.grad = np.zeros_like(param.value) preds = self.Conv1.forward(X) preds = self.ReLU1.forward(preds) preds = self.MaxPool1.forward(preds) preds = self.Conv2.forward(preds) preds = self.ReLU2.forward(preds) preds = self.MaxPool2.forward(preds) preds = self.Flatten.forward(preds) preds = self.FC.forward(preds) loss, grad = softmax_with_cross_entropy(preds, y) grad = self.FC.backward(grad) grad = self.Flatten.backward(grad) grad = self.MaxPool2.backward(grad) grad = self.ReLU2.backward(grad) grad = self.Conv2.backward(grad) grad = self.MaxPool1.backward(grad) grad = self.ReLU1.backward(grad) grad = self.Conv1.backward(grad) return loss def predict(self, X): # You can probably copy the code from previous assignment preds = self.Conv1.forward(X) preds = self.ReLU1.forward(preds) preds = self.MaxPool1.forward(preds) preds = self.Conv2.forward(preds) preds = self.ReLU2.forward(preds) preds = self.MaxPool2.forward(preds) preds = self.Flatten.forward(preds) preds = self.FC.forward(preds) probs = softmax(preds) return np.argmax(probs, axis=1) def params(self): # TODO: Aggregate all the params from all the layers # which have parameters return { 'Conv1.W': self.Conv1.W, 'Conv1.B': self.Conv1.B, 'Conv2.W': self.Conv2.W, 'Conv2.B': self.Conv2.B, 'FC.W': self.FC.W, 'FC.B': self.FC.B }
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.fc_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu_1 = ReLULayer() self.fc_2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for k in self.params().values(): k.grad = np.zeros_like(k.grad) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model X = self.fc_1.forward(X) X = self.relu_1.forward(X) X = self.fc_2.forward(X) loss, d_pred = softmax_with_cross_entropy(X, y) grad = self.fc_2.backward(d_pred) grad = self.relu_1.backward(grad) grad = self.fc_1.backward(grad) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for p in self.params().values(): l, g = l2_regularization(p.value, self.reg) loss += l p.grad += g return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) X = self.fc_1.forward(X) X = self.relu_1.forward(X) X = self.fc_2.forward(X) probs = softmax(X) pred = np.argmax(probs, axis=1) return pred def params(self): # TODO Implement aggregating all of the params result = { 'fc1_W': self.fc_1.W, 'fc1_B': self.fc_1.B, 'fc2_W': self.fc_2.W, 'fc2_B': self.fc_2.B } return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.layer1 = FullyConnectedLayer(n_input, n_output) self.layer2 = ReLULayer() self.layer3 = FullyConnectedLayer(n_output, n_output) #raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! #raise Exception("Not implemented!") self.layer1.W.grad = np.zeros_like(self.layer1.W.grad) self.layer3.W.grad = np.zeros_like(self.layer3.W.grad) self.layer1.B.grad = np.zeros_like(self.layer1.B.grad) self.layer3.B.grad = np.zeros_like(self.layer3.B.grad) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model out1 = self.layer1.forward(X) out2 = self.layer2.forward(out1) out3 = self.layer3.forward(out2) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! #raise Exception("Not implemented!") loss, grad = softmax_with_cross_entropy( out3, y) #+ l2_regularization(W, reg) back3 = self.layer3.backward(grad) # бэкпропагатион reg_loss3, dw_dr3 = l2_regularization( self.layer3.W.value, self.reg) # считаем регуляризационные члены self.layer3.W.grad += dw_dr3 back2 = self.layer2.backward(back3) back1 = self.layer1.backward(back2) reg_loss1, dw_dr1 = l2_regularization(self.layer1.W.value, self.reg) self.layer1.W.grad += dw_dr1 return loss + reg_loss1 + reg_loss3 def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) out1 = self.layer1.forward(X) out2 = self.layer2.forward(out1) out3 = self.layer3.forward(out2) pred = np.argmax(out3, axis=1) #raise Exception("Not implemented!") return pred def params(self): result = { 'layer1.W': self.layer1.W, 'layer1.B': self.layer1.B, 'layer3.W': self.layer3.W, 'layer1.B': self.layer3.B } # TODO Implement aggregating all of the params #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for i_param in self.params(): param = self.params()[i_param] param.grad = np.zeros_like(param.grad) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model a = self.fc1.forward(X) b = self.relu.forward(a) f = self.fc2.forward(b) loss, dL = softmax_with_cross_entropy(f, y) df = self.fc2.backward(dL) db = self.relu.backward(df) da = self.fc1.backward(db) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for i_param in self.params(): param = self.params()[i_param] param_loss, param_grad = l2_regularization(param.value, self.reg) param.grad += param_grad loss += param_loss return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused a = self.fc1.forward(X) b = self.relu.forward(a) f = self.fc2.forward(b) pred = f.argmax(axis=1) return pred def params(self): # TODO Implement aggregating all of the params result = { 'W1': self.fc1.W, 'B1': self.fc1.B, 'W2': self.fc2.W, 'B2': self.fc2.B } return result