def __init__(self, n_inputs, n_hidden, n_classes): """ Initializes MLP object. Args: n_inputs: number of inputs. n_hidden: list of ints, specifies the number of units in each linear layer. If the list is empty, the MLP will not have any linear layers, and the model will simply perform a multinomial logistic regression. n_classes: number of classes of the classification problem. This number is required in order to specify the output dimensions of the MLP TODO: Implement initialization of the network. """ ######################## # PUT YOUR CODE HERE # ####################### self.layers = [] in_features = n_inputs if len(n_hidden) > 0: for hidden_layer in n_hidden: self.layers.append(LinearModule(in_features, hidden_layer)) #self.layers.append(ELUModule()) self.layers.append(ReluModule()) in_features = hidden_layer self.layers.append(LinearModule(in_features, n_classes)) self.layers.append(SoftMaxModule())
def test_init_b(self): """asset the shapes of the bias of the linear module.""" #input and output dimensions in_features = 10 out_features = 3 #numpy hand made linear module a = LinearModule(in_features, out_features) #shape of the weight matrix self.assertEqual((out_features, 1), a.params["bias"].shape) self.assertEqual((1, out_features), a.grads["bias"].shape)
def test_init_w(self): """asset the shape of the weight matrix of the linear module.""" #input and output dimensions in_features = 10 out_features = 3 #numpy hand made linear module a = LinearModule(in_features, out_features) #gold-standard linear module b = nn.Linear(in_features, out_features) #shape of the weight matrix self.assertEqual(b.weight.shape, a.params["weight"].shape) self.assertEqual(b.weight.shape, a.grads["weight"].T.shape)
def test_linear_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 10)) x = np.random.randn(N, D) dout = np.random.randn(N, C) layer = LinearModule(D, C) out = layer.forward(x) dx = layer.backward(dout) dw = layer.grads['weight'] dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['weight'], dout) self.assertLess(rel_error(dx, dx_num), rel_error_max) self.assertLess(rel_error(dw, dw_num), rel_error_max)
def test_linear_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) #batch size D = np.random.choice(range(1, 100)) #num in_features C = np.random.choice(range(1, 10)) #num classes = out_features x = np.random.randn(N, D) #mini-batch dout = np.random.randn(N, C) #cross-entropy loss? layer = LinearModule(D, C) out = layer.forward(x) dx = layer.backward(dout) dw = layer.grads['weight'] dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['weight'], dout) self.assertLess(rel_error(dx, dx_num), rel_error_max) self.assertLess(rel_error(dw, dw_num), rel_error_max)