def loss(self, X, y=None): """ Evaluate loss and gradient for the six-layer convolutional network. Input / output: Same API as TwoLayerNet in fc_net.py. """ W1, b1 = self.params['W1'], self.params['b1'] W2, b2 = self.params['W2'], self.params['b2'] W3, b3 = self.params['W3'], self.params['b3'] W4, b4 = self.params['W4'], self.params['b4'] gamma1, beta1 = self.params['gamma1'], self.params['beta1'] gamma2, beta2 = self.params['gamma2'], self.params['beta2'] filter_size = W1.shape[2] conv_param = {'stride': 1, 'pad': (filter_size - 1) // 2} pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} bn_param = [{'mode': 'train'}, {'mode': 'train'}] scores = None a1, cache1 = conv_bn_relu_forward(X, W1, b1, gamma1, beta1, conv_param, bn_param[0]) a2, cache2 = max_pool_forward_fast(a1, pool_param) a3, cache3 = conv_bn_relu_forward(a2, W2, b2, gamma2, beta2, conv_param, bn_param[1]) a4, cache4 = max_pool_forward_fast(a3, pool_param) a5, cache5 = affine_relu_forward(a4, W3, b3) scores, cache6 = affine_forward(a5, W4, b4) if y is None: return scores loss, grads = 0, {} loss, dscore = softmax_loss(scores, y) da5, grads['W4'], grads['b4'] = affine_backward(dscore, cache6) da4, grads['W3'], grads['b3'] = affine_relu_backward(da5, cache5) da3 = max_pool_backward_fast(da4, cache4) da2, grads['W2'], grads['b2'], grads['gamma2'], grads[ 'beta2'] = conv_bn_relu_backward(da3, cache3) da1 = max_pool_backward_fast(da2, cache2) _, grads['W1'], grads['b1'], grads['gamma1'], grads[ 'beta1'] = conv_bn_relu_backward(da1, cache1) grads['W1'] += self.reg * W1 grads['W2'] += self.reg * W2 grads['W3'] += self.reg * W3 grads['W4'] += self.reg * W4 loss += 0.5 * self.reg * sum([np.sum(W**2) for W in [W1, W2, W3, W4]]) return loss, grads
def conv_relu_pool_backward(dout, cache): """ Backward pass for the conv-relu-pool convenience layer """ conv_cache, relu_cache, pool_cache = cache ds = max_pool_backward_fast(dout, pool_cache) da = relu_backward(ds, relu_cache) dx, dw, db = conv_backward_fast(da, conv_cache) return dx, dw, db
t0 = time() out_naive, cache_naive = max_pool_forward_naive(x, pool_param) t1 = time() out_fast, cache_fast = max_pool_forward_fast(x, pool_param) t2 = time() print('Testing pool_forward_fast:') print('Naive: %fs' % (t1 - t0)) print('fast: %fs' % (t2 - t1)) print('speedup: %fx' % ((t1 - t0) / (t2 - t1))) print('difference: ', rel_error(out_naive, out_fast)) t0 = time() dx_naive = max_pool_backward_naive(dout, cache_naive) t1 = time() dx_fast = max_pool_backward_fast(dout, cache_fast) t2 = time() print('\nTesting pool_backward_fast:') print('Naive: %fs' % (t1 - t0)) print('fast: %fs' % (t2 - t1)) print('speedup: %fx' % ((t1 - t0) / (t2 - t1))) print('dx difference: ', rel_error(dx_naive, dx_fast)) # Convolutional "sandwich" layers # Previously we introduced the concept of "sandwich" layers that combine multiple # operations into commonly used patterns. In the file cs231n/layer_utils.py you # will find sandwich layers that implement a few commonly used patterns for # convolutional networks. from cs231n.layer_utils import conv_relu_pool_forward, conv_relu_pool_backward
def _backward(self, x, cache): return max_pool_backward_fast(x, cache)
t0 = time() out_naive, cache_naive = max_pool_forward_naive(x, pool_param) t1 = time() out_fast, cache_fast = max_pool_forward_fast(x, pool_param) t2 = time() print 'Testing pool_forward_fast:' print 'Naive: %fs' % (t1 - t0) print 'fast: %fs' % (t2 - t1) print 'speedup: %fx' % ((t1 - t0) / (t2 - t1)) print 'difference: ', rel_error(out_naive, out_fast) t0 = time() dx_naive = max_pool_backward_naive(dout, cache_naive) t1 = time() dx_fast = max_pool_backward_fast(dout, cache_fast) t2 = time() print '\nTesting pool_backward_fast:' print 'Naive: %fs' % (t1 - t0) print 'speedup: %fx' % ((t1 - t0) / (t2 - t1)) print 'dx difference: ', rel_error(dx_naive, dx_fast) # # Convolutional "sandwich" layers # Previously we introduced the concept of "sandwich" layers that combine multiple operations into commonly used patterns. In the file `cs231n/layer_utils.py` you will find sandwich layers that implement a few commonly used patterns for convolutional networks. # In[11]: from cs231n.layer_utils import conv_relu_pool_forward, conv_relu_pool_backward