def train_loss(*args): X = args[0] y = args[1] res = X for l in xrange(self.num_layers): prev_res = res res = affine_forward(prev_res, args[self.w_idx(l)], args[self.b_idx(l)]) if l < (self.num_layers - 1): if self.use_batchnorm: res = batchnorm_forward(res, args[self.bn_ga_idx(l)], args[self.bn_bt_idx(l)], self.bn_params[l]) res = relu_forward(res) if self.use_dropout: res = dropout_forward(res, self.dropout_param) scores = res if mode == 'test': return scores #loss, _ = softmax_loss(scores, y) loss = svm_loss(scores, y) return loss
def loss(self, X, y=None): """ Compute loss and gradient for a minibatch of data. Args: - X: Input data, numpy array of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ scores = None X = X.astype(self.dtype) linear_cache = dict() relu_cache = dict() dropout_cache = dict() """ TODO: Implement the forward pass for the fully-connected neural network, compute the scores and store them in the scores variable. """ ####################################################################### # BEGIN OF YOUR CODE # ####################################################################### VAL = X.copy() for i in range(1, self.num_layers): linear_cache['L{}'.format(i)] = linear_forward( VAL, self.params['W{}'.format(i)], self.params['b{}'.format(i)]) relu_cache['R{}'.format(i)] = relu_forward( linear_cache['L{}'.format(i)]) if self.use_dropout: dropout_cache['D{}'.format(i)], dropout_cache['MASK{}'.format(i)] = dropout_forward(relu_cache['R{}'.format(i)],\ self.dropout_params['p'], self.dropout_params['train'],\ self.dropout_params['seed']) VAL = dropout_cache['D{}'.format(i)] else: VAL = relu_cache['R{}'.format(i)] linear_cache['L{}'.format(self.num_layers)] = linear_forward(VAL, self.params['W{}'.format(self.num_layers)],\ self.params['b{}'.format(self.num_layers)]) scores = linear_cache['L{}'.format(self.num_layers)] ####################################################################### # END OF YOUR CODE # ####################################################################### # If y is None then we are in test mode so just return scores if y is None: return scores loss, grads = 0, dict() """ TODO: Implement the backward pass for the fully-connected net. Store the loss in the loss variable and all gradients in the grads dictionary. Compute the loss with softmax. grads[k] has the gradients for self.params[k]. Add L2 regularisation to the loss function. NOTE: To ensure that your implementation matches ours and you pass the automated tests, make sure that your L2 regularization includes a factor of 0.5 to simplify the expression for the gradient. """ ####################################################################### # BEGIN OF YOUR CODE # ####################################################################### loss, grad = softmax(scores, y) if self.use_dropout: VAR = dropout_cache['D{}'.format(self.num_layers - 1)] else: VAR = relu_cache['R{}'.format(self.num_layers - 1)] dX, grads['W{}'.format(self.num_layers)], grads['b{}'.format(self.num_layers)] = linear_backward(grad, \ VAR, self.params['W{}'.format(self.num_layers)],self.params['b{}'.format(self.num_layers)]) grads['W{}'.format( self.num_layers)] += self.reg * self.params['W{}'.format( self.num_layers)] loss += 0.5 * self.reg * np.sum(self.params['W' + str(self.num_layers)] **2) for inx in range(self.num_layers - 1, 0, -1): if self.use_dropout: dX = dropout_backward(dX, dropout_cache['MASK{}'.format(inx)], self.dropout_params['p']) dX = relu_backward(dX, linear_cache['L' + str(inx)]) if inx - 1 != 0: if self.use_dropout: pre_layer = dropout_cache['D{}'.format(inx - 1)] else: pre_layer = relu_cache['R{}'.format(inx - 1)] dX, grads['W' + str(inx)], grads['b' + str(inx)] = linear_backward( dX, pre_layer, self.params['W{}'.format(inx)], self.params['b{}'.format(inx)]) grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)] loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2) else: dX, grads['W' + str(inx)], grads['b' + str(inx)] = linear_backward( dX, X, self.params['W{}'.format(inx)], self.params['b{}'.format(inx)]) grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)] loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2) ####################################################################### # END OF YOUR CODE # ####################################################################### return loss, grads
def apply_forward_dropout(self, x): x_, mask = dropout_forward(x, p=self.dropout_params["p"], train=self.dropout_params["train"], seed=self.dropout_params["seed"]) return x_, mask
def loss(self, X, y=None): """ Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above. """ X = X.astype(self.dtype) mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they # behave differently during training and testing. if self.use_dropout: self.dropout_param['mode'] = mode if self.use_batchnorm: for bn_param in self.bn_params: bn_param['mode'] = mode scores = None cache = self.num_layers * [None] dropout_cache = (self.num_layers - 1) * [None] for i in np.arange(self.num_layers - 1): if not self.use_batchnorm: scores, cache[i] = affine_relu_forward( X if i == 0 else scores, self.params['W%d' % (i + 1)], self.params['b%d' % (i + 1)]) else: scores, cache[i] = affine_bn_relu_forward( X if i == 0 else scores, self.params['W%d' % (i + 1)], self.params['b%d' % (i + 1)], self.params['gamma%d' % (i + 1)], self.params['beta%d' % (i + 1)], self.bn_params[i]) if self.use_dropout: scores, dropout_cache[i] = dropout_forward( scores, self.dropout_param) scores, cache[self.num_layers - 1] = affine_forward( scores, self.params['W%d' % self.num_layers], self.params['b%d' % self.num_layers]) ############################################################################ # When using batch normalization, you'll need to pass self.bn_params[0] to # # the forward pass for the first batch normalization layer, pass # # self.bn_params[1] to the forward pass for the second batch normalization # # layer, etc. # ############################################################################ # If test mode return early if mode == 'test': return scores loss, grads = 0.0, {} loss, dscore = softmax_loss(scores, y) dx, grads['W%d' % self.num_layers], grads['b%d' % self.num_layers] = affine_backward( dscore, cache[self.num_layers - 1]) for i in reversed(np.arange(self.num_layers - 1)): if self.use_dropout: dx = dropout_backward(dx, dropout_cache[i]) if not self.use_batchnorm: dx, grads['W%d' % (i + 1)], grads['b%d' % (i + 1)] = affine_relu_backward( dx, cache[i]) else: dx, grads['W%d' % (i+1)], grads['b%d' % (i+1)], grads['gamma%d' % (i+1)], grads['beta%d' % (i+1)] \ = affine_bn_relu_backward(dx, cache[i]) for i in np.arange(self.num_layers): loss += .5 * self.reg * np.sum( np.square(self.params['W%d' % (i + 1)])) grads['W%d' % (i + 1)] += self.reg * self.params['W%d' % (i + 1)] ############################################################################ # When using batch normalization, you don't need to regularize the scale # # and shift parameters. # # # ############################################################################ return loss, grads
def rel_error(x, y): return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) # load Cifar-10 data data = get_Cifar10_data() for k, v in data.items(): print('%s: \t' % k, v.shape) ############################# Dropout forward pass ############################ np.random.seed(231) x = np.random.randn(500, 500) + 10 for p in [0.3, 0.6, 0.75]: out, _ = dropout_forward(x, {'mode': 'train', 'p': p}) out_test, _ = dropout_forward(x, {'mode': 'test', 'p': p}) print('Running tests with p = ', p) print('Mean of input: ', x.mean()) print('Mean of train-time output: ', out.mean()) print('Mean of test-time output: ', out_test.mean()) print('Fraction of train-time output set to zero: ', (out == 0).mean()) print('Fraction of test-time output set to zero: ', (out_test == 0).mean()) print() ############################# Dropout backward pass ########################### np.random.seed(231) x = np.random.randn(10, 10) + 10 dout = np.random.randn(*x.shape)