Пример #1
0
def conv_relu_backward(dout, cache):
    """
    Backward pass for the conv-relu convenience layer.
    """
    conv_cache, relu_cache = cache
    da = relu_backward(dout, relu_cache)
    dx, dw, db = conv_backward_fast(da, conv_cache)
    return dx, dw, db
Пример #2
0
def affine_relu_backward(dout, cache):
    """
    Backward pass for the affine-relu convenience layer
    """
    fc_cache, relu_cache = cache
    da = relu_backward(dout, relu_cache)
    dx, dw, db = affine_backward(da, fc_cache)
    return dx, dw, db
Пример #3
0
def affine_relu_backward( dout, cache ):
	"""
	Backward pass for the affine-relu convenience layer
	"""
	fc_cache, relu_cache = cache
	da = layers.relu_backward( dout, relu_cache )
	dx, dw, db = layers.affine_backward( da, fc_cache )
	return dx, dw, db
def conv_relu_backward(dout, cache):
    """
    Backward pass for the conv-relu convenience layer.
    """
    conv_cache, relu_cache = cache
    da = relu_backward(dout, relu_cache)
    dx, dw, db = conv_backward_fast(da, conv_cache)
    return dx, dw, db
Пример #5
0
def affine_batchnorm_relu_backward(dout, cache):
    """
    Backward pass for the Affine->BatchNorm->ReLU convenience layer
    """
    fc_cache, bn_cache, relu_cache = cache
    da = relu_backward(dout, relu_cache)
    dan, dgamma, dbeta = batchnorm_backward(da, bn_cache)
    dx, dw, db = affine_backward(dan, fc_cache)
    return dx, dw, db, dgamma, dbeta
Пример #6
0
def conv_relu_pool_backward(dout, cache):
    """
    Backward pass for the conv-relu-pool convenience layer
    """
    conv_cache, relu_cache, pool_cache = cache
    ds = max_pool_backward_fast(dout, pool_cache)
    da = relu_backward(ds, relu_cache)
    dx, dw, db = conv_backward_fast(da, conv_cache)
    return dx, dw, db
def conv_relu_pool_backward(dout, cache):
    """
    Backward pass for the conv-relu-pool convenience layer
    """
    conv_cache, relu_cache, pool_cache = cache
    ds = max_pool_backward_fast(dout, pool_cache)
    da = relu_backward(ds, relu_cache)
    dx, dw, db = conv_backward_fast(da, conv_cache)
    return dx, dw, db
Пример #8
0
def affine_bn_relu_backward(dout, cache):
    """
    Backward pass for the affine-bn-relu convenience layer
    """
    fc_cache, bn_cache, relu_cache = cache
    dx = layers.relu_backward(dout, relu_cache)
    dx, dgamma, dbeta = layers.batchnorm_backward_alt(dx, bn_cache)
    dx, dw, db = layers.affine_backward(dx, fc_cache)
    return dx, dw, db, dgamma, dbeta
Пример #9
0
def combo_backward(dout, cache):
    """
    Backward pass for the affine-relu convenience layer
    """
    dgamma, dbeta = 0, 0
    fc_cache, bn_cache, relu_cache = cache

    da = relu_backward(dout, relu_cache)
    if bn_cache is not None:
        da, dgamma, dbeta = batchnorm_backward(da, bn_cache)
    dx, dw, db = affine_backward(da, fc_cache)
    return dx, dw, db, dgamma, dbeta
    0.40909091,
    0.5,
]])

# Compare your output with ours. The error should be around 5e-8
print('Testing relu_forward function:')
print('difference: ', rel_error(out, correct_out))

np.random.seed(231)
x = np.random.randn(10, 10)
dout = np.random.randn(*x.shape)

dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout)

_, cache = relu_forward(x)
dx = relu_backward(dout, cache)

# The error should be around 3e-12
print('Testing relu_backward function:')
print('dx error: ', rel_error(dx_num, dx))

np.random.seed(231)
x = np.random.randn(2, 3, 4)
w = np.random.randn(12, 10)
b = np.random.randn(10)
dout = np.random.randn(2, 10)

out, cache = affine_relu_forward(x, w, b)
dx, dw, db = affine_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(
Пример #11
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        caches = collections.defaultdict(list)
        out_layer = X

        for i in range(self.num_layers - 1):
            n = str(i + 1)

            # (zy) The learned parameters are for BN affine transformation used
            # in training, while the running average is used for prediction.
            if self.use_batchnorm:
                out_layer, cache = affine_bn_relu_forward(
                    out_layer, self.params["W" + n], self.params["b" + n],
                    self.params["gamma" + n], self.params["beta" + n],
                    self.bn_params[i])
                caches["affine_bn_relu"].append(cache)
            else:
                out_layer, cache = layers.affine_forward(
                    out_layer, self.params["W" + n], self.params["b" + n])
                caches["affine"].append(cache)

                out_layer, cache = layers.relu_forward(out_layer)
                caches["relu"].append(cache)

            if self.use_dropout:
                out_layer, cache = layers.dropout_forward(
                    out_layer, self.dropout_param)
                caches["drop"].append(cache)

        nn = str(self.num_layers)
        scores, cache = layers.affine_forward(out_layer, self.params["W" + nn],
                                              self.params["b" + nn])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dloss = layers.softmax_loss(scores, y)
        # for regularization
        if self.reg != 0:
            for k, v in self.params.items():
                # only include the w parameters, excluding gamma, beta and b
                if k.startswith("W"):
                    loss += 0.5 * self.reg * np.sum(v**2)

        # get the gradient
        out = layers.affine_backward(dloss, cache)
        dout, grads["W" + nn], grads["b" + nn] = out
        grads["W" + nn] += self.reg * cache[1]

        for i in range(self.num_layers - 2, -1, -1):
            n = str(i + 1)

            if self.use_dropout:
                dout = layers.dropout_backward(dout, caches["drop"][i])

            if self.use_batchnorm:
                out = affine_bn_relu_backward(dout,
                                              caches["affine_bn_relu"][i])
                dout, grads["W"+n], grads["b"+n], \
                    grads["gamma"+n], grads["beta"+n] = out
                grads["W" +
                      n] += self.reg * self.params["W" + n] if self.reg else 0

            else:
                dout = layers.relu_backward(dout, caches["relu"][i])

                out = layers.affine_backward(dout, caches["affine"][i])
                dout, grads["W" + n], grads["b" + n] = out
                # need to include regularization
                grads["W" + n] += self.reg * caches["affine"][i][1]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Пример #12
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        """
        if self.use_dropout:
            self.dropout_param['mode']=mode
       """
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        X_temp = X
        affine_Input = list()
        relu_input = list()
        batchnorm_input = list()
        dropout_input = list()
        score_tmp = None
        for i in range(self.num_layers - 1):
            tmp, affine_input_tmp = affine_forward(
                X_temp, self.params['W' + str(i + 1)],
                self.params['b' + str(i + 1)])
            if self.use_batchnorm:
                tmp, batchnorm_cache = batchnorm_forward(
                    tmp, self.params['gamma' + str(i + 1)],
                    self.params['beta' + str(i + 1)], self.bn_params[i])
                batchnorm_input.append(batchnorm_cache)
            score_tmp, relu_input_tmp = relu_forward(tmp)
            if self.use_dropout:
                score_tmp, dropout_cache = dropout_forward(
                    score_tmp, self.dropout_param)
                dropout_input.append(dropout_cache)
            affine_Input.append(affine_input_tmp)
            relu_input.append(relu_input_tmp)
            X_temp = score_tmp
        scores, last_input_tmp = affine_forward(
            score_tmp, self.params['W' + str(self.num_layers)],
            self.params['b' + str(self.num_layers)])
        affine_Input.append(last_input_tmp)
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        if mode == 'test':
            return scores
        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        num_trains = X.shape[0]
        loss, dscores = softmax_loss(scores, y)
        weight_decay_sum = 0
        for i in range(self.num_layers):
            tmp = np.sum(self.params['W' + str(i + 1)] *
                         self.params['W' + str(i + 1)])
            weight_decay_sum = weight_decay_sum + tmp

        loss = loss + 0.5 * self.reg * weight_decay_sum
        #softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        #softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        dout = dscores
        for i in range(self.num_layers):
            dx, dw, db = affine_backward(dout, affine_Input[-(i + 1)])
            grads['W' +
                  str(self.num_layers - i)] = dw + self.reg * self.params[
                      'W' + str(self.num_layers - i)]
            grads['b' + str(self.num_layers - i)] = db
            if self.use_dropout and i != self.num_layers - 1:
                dx = dropout_backward(dx, dropout_input[-(i + 1)])
            if i != self.num_layers - 1:
                dout = relu_backward(dx, relu_input[-(i + 1)])
            if i != self.num_layers - 1 and self.use_batchnorm:
                dout, dgamma, dbeta = batchnorm_backward(
                    dout, batchnorm_input[-(i + 1)])
                grads['gamma' + str(self.num_layers - i - 1)] = dgamma
                grads['beta' + str(self.num_layers - i - 1)] = dbeta

        return loss, grads
Пример #13
0
def two_layer_net(X, model, y=None, reg=0.0):
  """
  Compute the loss and gradients for a two layer fully connected neural network.
  The net has an input dimension of D, a hidden layer dimension of H, and
  performs classification over C classes. We use a softmax loss function and L2
  regularization the the weight matrices. The two layer net should use a ReLU
  nonlinearity after the first affine layer.

  The two layer net has the following architecture:

  input - fully connected layer - ReLU - fully connected layer - softmax

  The outputs of the second fully-connected layer are the scores for each
  class.

  Inputs:
  - X: Input data of shape (N, D). Each X[i] is a training sample.
  - model: Dictionary mapping parameter names to arrays of parameter values.
    It should contain the following:
    - W1: First layer weights; has shape (D, H)
    - b1: First layer biases; has shape (H,)
    - W2: Second layer weights; has shape (H, C)
    - b2: Second layer biases; has shape (C,)
  - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
    an integer in the range 0 <= y[i] < C. This parameter is optional; if it
    is not passed then we only return scores, and if it is passed then we
    instead return the loss and gradients.
  - reg: Regularization strength.

  Returns:
  If y not is passed, return a matrix scores of shape (N, C) where scores[i, c]
  is the score for class c on input X[i].

  If y is not passed, instead return a tuple of:
  - loss: Loss (data loss and regularization loss) for this batch of training
    samples.
  - grads: Dictionary mapping parameter names to gradients of those parameters
    with respect to the loss function. This should have the same keys as model.
  """

  # unpack variables from the model dictionary
  W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
  N, D = X.shape

  # compute the forward pass
  scores = None
  #############################################################################
  # TODO: Perform the forward pass, computing the class scores for the input. #
  # Store the result in the scores variable, which should be an array of      #
  # shape (N, C).                                                             #
  #############################################################################
  # relu = lambda x: np.maximum(x,0)
  # H, C = W2.shape
  # scores = np.zeros((N,C))
  # layer1 = np.maximum(np.dot(X,W1) + b1,0)
  # scores = np.dot(layer1,W2) + b2
  ## above is the test implementation
  ## NOW, using cs231n/layers.py
  ## NOTICE define layer0 = X
  # then behaviour is 'functional' layer(n+1) = f(layer(n) | parameters)
  from cs231n.layers import affine_forward, relu_forward, softmax_loss
  from cs231n.layers import affine_backward, relu_backward

  layer1, cache1 = affine_forward(X, W1, b1)
  layer2, cache2 = relu_forward(layer1)
  layer3, cache3 = affine_forward(layer2, W2, b2)

  scores = layer3
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################
  
  # If the targets are not given then jump out, we're done
  if y is None:
    return scores

  # compute the loss
  loss = None
  #############################################################################
  # TODO: Finish the forward pass, and compute the loss. This should include  #
  # both the data loss and L2 regularization for W1 and W2. Store the result  #
  # in the variable loss, which should be a scalar. Use the Softmax           #
  # classifier loss. So that your results match ours, multiply the            #
  # regularization loss by 0.5                                                #
  #############################################################################
  # rows   = np.sum(np.exp(scores), axis=1)
  # layer4 = np.mean(-layer3[range(N), y] + np.log(rows))
  # loss   = layer4 + 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
  # 
  loss, dx = softmax_loss(scores, y)
  loss += 0.5 * reg * np.sum(W1*W1) + 0.5 * reg * np.sum(W2 * W2)
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  # compute the gradients
  grads = {}
  #############################################################################
  # TODO: Compute the backward pass, computing the derivatives of the weights #
  # and biases. Store the results in the grads dictionary. For example,       #
  # grads['W1'] should store the gradient on W1, and be a matrix of same size #
  #############################################################################
  dlayer2, grads['W2'], grads['b2'] = affine_backward(dx, cache3)
  dlayer1                           = relu_backward(dlayer2, cache2)
  dLayer0, grads['W1'], grads['b1'] = affine_backward(dlayer1, cache1)

  #gradients need to have regularization term
  grads['W2'] += reg * W2
  grads['W1'] += reg * W1
  #############################################################################
  #                              END OF YOUR CODE                             #
  #############################################################################

  return loss, grads
Пример #14
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        out_affine1, cache_affine1 = layers.affine_forward(
            X, self.params["W1"], self.params["b1"])
        out_relu1, cache_relu1 = layers.relu_forward(out_affine1)
        out_affine2, cache_affine2 = layers.affine_forward(
            out_relu1, self.params["W2"], self.params["b2"])
        # no need to compute SVM/softmax loss, just give the argmax result When
        # we are in prediction.
        scores = out_affine2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # in training, compute the loss and do backprop.
        loss, dloss = layers.softmax_loss(scores, y)
        # need to add regularization here...
        loss += 0.5 * self.reg * (np.sum(self.params["W1"]**2) +
                                  np.sum(self.params["W2"]**2))
        dout_affine2 = layers.affine_backward(dloss, cache_affine2)
        grads["W2"] = dout_affine2[1] + self.reg * self.params["W2"]
        grads["b2"] = dout_affine2[2]
        dout_relu1 = layers.relu_backward(dout_affine2[0], cache_relu1)
        dout_affine1 = layers.affine_backward(dout_relu1, cache_affine1)
        grads["W1"] = dout_affine1[1] + self.reg * self.params["W1"]
        grads["b1"] = dout_affine1[2]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Пример #15
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        hidden1_out, h1_cache = affine_forward(X, self.params['W1'],
                                               self.params['b1'])
        relu_out, relu_cache = relu_forward(hidden1_out)
        scores, h2_cache = affine_forward(relu_out, self.params['W2'],
                                          self.params['b2'])
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        """
        X_reshape=np.reshape(X,(X.shape[0],-1))
        num_trains=X.shape[0]
        loss,_=softmax_loss(scores,y)
        loss=loss+self.reg*0.5*(np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']))
        softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1)
        softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1
        grads['b2']=np.zeros_like(self.params['b2'])
        grads['W2']=np.zeros_like(self.params['W2'])
        grads['b1']=np.zeros_like(self.params['b1'])
        grads['W1']=np.zeros_like(self.params['W1'])
        grads['b2']=np.sum(softmax_output,axis=0)
        grads['W2']=np.dot(relu_out.T,softmax_output)
        grads_b1_tmp=np.dot(softmax_output,self.params['W2'].T)
        tmp=(relu_out>0)*grads_b1_tmp
        grads['b1']=np.sum(tmp,axis=0)
        grads['W1']=np.dot(X_reshape.T,grads_b1_tmp)
        grads['W1']=grads['W1']/num_trains+self.reg*self.params['W1']
        grads['b1']=grads['b1']/num_trains
        grads['W2']=grads['W2']/num_trains+self.reg*self.params['W2']
        grads['b2']=grads['b2']/num_trains
        """
        num_trains = X.shape[0]
        loss, dscore = softmax_loss(scores, y)
        loss = loss + self.reg * 0.5 * (
            np.sum(self.params['W2'] * self.params['W2']) +
            np.sum(self.params['W1'] * self.params['W1']))
        grads_h2, grads_w2, grads_b2 = affine_backward(dout=dscore,
                                                       cache=h2_cache)
        grads_relu = relu_backward(grads_h2, relu_cache)
        grads_h1, grads_w1, grads_b1 = affine_backward(grads_relu, h1_cache)
        grads['W1'] = grads_w1 + self.reg * self.params['W1']
        grads['W2'] = grads_w2 + self.reg * self.params['W2']
        grads['b1'] = grads_b1
        grads['b2'] = grads_b2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################
        return loss, grads
Пример #16
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################

        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N = X.shape[0]
        D = np.prod(X.shape[1:])

        X_ = X.reshape(N, D)
        A, fc1_cache = affine_forward(X_, W1, b1)
        R, relu_cache = relu_forward(A)
        scores, fc2_cache = affine_forward(R, W2, b2)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        loss, dscores = softmax_loss(scores, y)
        dR, dW2, db2 = affine_backward(dscores, fc2_cache)
        dA = relu_backward(dR, relu_cache)
        dX, dW1, db1 = affine_backward(dA, fc1_cache)

        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
        dW2 += self.reg * W2
        dW1 += self.reg * W1

        grads = {'W1': dW1, 'b1': db1, 'W2': dW2, 'b2': db2}

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Пример #17
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.normalization == 'batchnorm':
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        arg, caches = X, []

        for i in range(1, self.num_layers + 1):
            cache = {}

            W = self.params[f"W{i}"]
            b = self.params[f"b{i}"]
            arg, cache['fc_cache'] = affine_forward(arg, W, b)

            if i != self.num_layers and self.normalization:
                gamma = self.params[f"gamma{i}"]
                beta = self.params[f"beta{i}"]

                normalize_forward = batchnorm_forward if self.normalization is 'batchnorm' else layernorm_forward
                arg, cache['bn_cache'] = normalize_forward(arg, gamma, beta, self.bn_params[i-1])

            arg, cache['relu_cache'] = relu_forward(arg)

            if self.use_dropout:
                arg, cache['dropout_cache'] = dropout_forward(arg, self.dropout_param)

            caches.append(cache)

        scores = arg

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch/layer normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        loss, dout = softmax_loss(scores, y)

        for i in range(self.num_layers, 0, -1):
            W = self.params[f"W{i}"]
            cache = caches[i-1]

            if self.use_dropout:
                dout = dropout_backward(dout, cache['dropout_cache'])

            da = relu_backward(dout, cache['relu_cache'])

            if i != self.num_layers and self.normalization:
                normalize_backward = batchnorm_backward if self.normalization is 'batchnorm' else layernorm_backward
                da, dgamma, dbeta = batchnorm_backward(da, cache['bn_cache'])
                grads[f"gamma{i}"] = dgamma
                grads[f"beta{i}"] = dbeta

            dout, dw, db = affine_backward(da, cache['fc_cache'])

            grads[f"W{i}"] = dw + self.reg * W
            grads[f"b{i}"] = db

            loss += 0.5 * self.reg * np.sum(W * W)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads