示例#1
0
def test_sandwich_layers(samples=random.randrange(1, 10)):
    for x in range(0, samples):
        x = np.random.randn(2, 3, 4)
        w = np.random.randn(12, 10)
        b = np.random.randn(10)

        dout = np.random.randn(2, 10)
        #need an input for gradient being backpropagated into this layer

        out, cache = affine_relu_forward(x, w, b)
        dx, dw, db = affine_relu_backward(dout, cache)

        dx_num = eval_numerical_gradient_array(
            lambda x: affine_relu_forward(x, w, b)[0], x, dout)
        dw_num = eval_numerical_gradient_array(
            lambda w: affine_relu_forward(x, w, b)[0], w, dout)
        db_num = eval_numerical_gradient_array(
            lambda b: affine_relu_forward(x, w, b)[0], b, dout)

        assert rel_error(dx_num, dx) < 5e-7
        assert rel_error(dw_num, dw) < 5e-7
        assert rel_error(db_num, db) < 5e-7

        assert dx.shape == dx_num.shape
        assert dw.shape == dw_num.shape
        assert db.shape == db_num.shape
        assert out.shape == dout.shape
示例#2
0
def Test_connect_layer():
    x = np.random.randn(2, 12)
    w = np.random.randn(12, 10)
    b = np.random.randn(10)
    dout = np.random.randn(2, 10)

    out, cache = affine_relu_forward(x, w, b)
    dx, dw, db = affine_relu_backward(dout, cache)

    dx_num = eval_numerical_gradient_array(
        lambda x: affine_relu_forward(x, w, b)[0].asnumpy(), x, dout)
    dw_num = eval_numerical_gradient_array(
        lambda w: affine_relu_forward(x, w, b)[0].asnumpy(), w, dout)
    db_num = eval_numerical_gradient_array(
        lambda b: affine_relu_forward(x, w, b)[0].asnumpy(), b, dout)

    print 'Testing affine_relu_forward:'
    print 'dx error: ', rel_error(dx_num, dx.asnumpy())
    print 'dw error: ', rel_error(dw_num, dw.asnumpy())
    print 'db error: ', rel_error(db_num, db.asnumpy())
示例#3
0
 def train_loss(X, y, W1, W2, b1, b2):
     l1 = affine_relu_forward(X, W1, b1)
     l2 = affine_forward(l1, W2, b2)
     scores = l2
     if y:
         #[TODO]: softmax is not supported yet
         # loss, d_scores = softmax_loss(scores, y)
         loss = svm_loss(scores, y)
         loss_with_reg = loss + np.sum(W1**2) * 0.5 * self.reg + np.sum(
             W2**2) * 0.5 * self.reg
     return loss_with_reg
示例#4
0
        def train_loss(X, y, W1, W2, b1, b2):
            l1 = affine_relu_forward(X, W1, b1)
            l2 = affine_forward(l1, W2, b2)
            scores = l2

            if y is None:
                return scores

            #[TODO]: softmax is not supported yet
            # loss, d_scores = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            loss_with_reg = loss + np.sum(W1**2) * 0.5 * self.reg + np.sum(
                W2**2) * 0.5 * self.reg

            return loss_with_reg
示例#5
0
def test_sandwich_layers(samples = random.randrange(1,10)):
    for x in range(0,samples):
        x = np.random.randn(2, 3, 4)
        w = np.random.randn(12, 10)
        b = np.random.randn(10)

        dout = np.random.randn(2, 10)
        #need an input for gradient being backpropagated into this layer

        out, cache = affine_relu_forward(x, w, b)
        dx, dw, db = affine_relu_backward(dout, cache)

        dx_num = eval_numerical_gradient_array(lambda x: affine_relu_forward(x, w, b)[0], x, dout)
        dw_num = eval_numerical_gradient_array(lambda w: affine_relu_forward(x, w, b)[0], w, dout)
        db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout)

        assert rel_error(dx_num, dx) < 5e-7
        assert rel_error(dw_num, dw) < 5e-7
        assert rel_error(db_num, db) < 5e-7

        assert dx.shape == dx_num.shape
        assert dw.shape == dw_num.shape
        assert db.shape == db_num.shape
        assert out.shape == dout.shape
示例#6
0
_, cache = relu_forward(x)
dx = relu_backward(dout, cache)

# The error should be around 1e-12
print 'Testing relu_backward function:'
print 'dx error: ', rel_error(dx_num, dx)

from cs231n.layer_utils import affine_relu_forward, affine_relu_backward

x = np.random.randn(2, 3, 4)
w = np.random.randn(12, 10)
b = np.random.randn(10)
dout = np.random.randn(2, 10)

out, cache = affine_relu_forward(x, w, b)
dx, dw, db = affine_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(
    lambda x: affine_relu_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: affine_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: affine_relu_forward(x, w, b)[0], b, dout)

print 'Testing affine_relu_forward:'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)

num_classes, num_inputs = 10, 50
_, cache = relu_forward(x)
dx = relu_backward(dout, cache)

# The error should be around 1e-12
print 'Testing relu_backward function:'
print 'dx error: ', rel_error(dx_num, dx)

#%% Test convenience layers
from cs231n.layer_utils import affine_relu_forward, affine_relu_backward

x = np.random.randn(2, 3, 4)
w = np.random.randn(12, 10)
b = np.random.randn(10)
dout = np.random.randn(2, 10)

out, cache = affine_relu_forward(x, w, b)
dx, dw, db = affine_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(lambda x: affine_relu_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout)

print 'Testing affine_relu_forward:'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)

#%% Loss layers
num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)
# # "Sandwich" layers
# There are some common patterns of layers that are frequently used in neural nets. For example, affine layers are frequently followed by a ReLU nonlinearity. To make these common patterns easy, we define several convenience layers in the file `cs231n/layer_utils.py`.
# 
# For now take a look at the `affine_relu_forward` and `affine_relu_backward` functions, and run the following to numerically gradient check the backward pass:

# In[ ]:

from cs231n.layer_utils import affine_relu_forward, affine_relu_backward

x = np.random.randn(2, 3, 4)
w = np.random.randn(12, 10)
b = np.random.randn(10)
dout = np.random.randn(2, 10)

out, cache = affine_relu_forward(x, w, b)
dx, dw, db = affine_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(lambda x: affine_relu_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout)

print 'Testing affine_relu_forward:'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)


# # Loss layers: Softmax and SVM
# You implemented these loss functions in the last assignment, so we'll give them to you for free here. You should still make sure you understand how they work by looking at the implementations in `cs231n/layers.py`.
# 
示例#9
0
# # "Sandwich" layers
# There are some common patterns of layers that are frequently used in neural nets. For example, affine layers are frequently followed by a ReLU nonlinearity. To make these common patterns easy, we define several convenience layers in the file `cs231n/layer_utils.py`.
#
# For now take a look at the `affine_relu_forward` and `affine_relu_backward` functions, and run the following to numerically gradient check the backward pass:

# In[ ]:

from cs231n.layer_utils import affine_relu_forward, affine_relu_backward
np.random.seed(231)
x = np.random.randn(2, 3, 4)
w = np.random.randn(12, 10)
b = np.random.randn(10)
dout = np.random.randn(2, 10)

out, cache = affine_relu_forward(x, w, b)
dx, dw, db = affine_relu_backward(dout, cache)

# dx_num = eval_numerical_gradient_array(lambda x: affine_relu_forward(x, w, b)[0], x, dout)
# dw_num = eval_numerical_gradient_array(lambda w: affine_relu_forward(x, w, b)[0], w, dout)
# db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout)
#
# # print('Testing affine_relu_forward:')
# print('Testing affine_relu_backward:')
# print('dx error: ', rel_error(dx_num, dx))
# print('dw error: ', rel_error(dw_num, dw))
# print('db error: ', rel_error(db_num, db))

# # Loss layers: Softmax and SVM
# You implemented these loss functions in the last assignment, so we'll give them to you for free here. You should still make sure you understand how they work by looking at the implementations in `cs231n/layers.py`.
#
示例#10
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        #######################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the
        # class scores for X and storing them in the scores variable.
        #######################################################################
        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]

        N = X.shape[0]
        C = W2.shape[1]

        scores = np.zeros((N, C))

        X_hidden, cache1 = affine_relu_forward(X, W1, b1)
        scores, cache2 = affine_forward(X_hidden, W2, b2)

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        #######################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the
        # loss in the loss variable and gradients in the grads dictionary.
        # Compute data loss using softmax, and make sure that grads[k]
        # holds the gradients for self.params[k]. Don't forget to add L2
        # regularization!
        #
        # NOTE: To ensure that your implementation matches ours and you pass
        # the automated tests, make sure that your L2 regularization includes a
        # factor of 0.5 to simplify the expression for the gradient.
        #######################################################################

        loss, dscores = softmax_loss(scores, y)
        loss += 0.5 * self.reg * (np.sum(W1 * W1) + np.sum(W2 * W2))

        dx_hidden, dw2, db2 = affine_backward(dscores, cache2)
        grads["W2"] = dw2 + self.reg * W2
        grads["b2"] = db2

        dx, dw1, db1 = affine_relu_backward(dx_hidden, cache1)
        grads["W1"] = dw1 + self.reg * W1
        grads["b1"] = db1
        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        return loss, grads
示例#11
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.dropout_param is not None:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param[mode] = mode

        scores = None
        #######################################################################
        # TODO: Implement the forward pass for the fully-connected net,
        # computing the class scores for X and storing them in the scores
        # variable.
        #
        # When using dropout, you'll need to pass self.dropout_param to each
        # dropout forward pass.
        #
        # When using batch normalization, you'll need to pass self.bn_params[0]
        # to the forward pass for the first batch normalization layer,
        # pass self.bn_params[1] to the forward pass for the second batch
        # normalization layer, etc.
        #######################################################################
        IN = X

        caches = {}
        if self.use_dropout:
            dropout_caches = {}

        for l in range(self.num_layers - 1):
            W = self.params["W{}".format(l + 1)]
            b = self.params["b{}".format(l + 1)]

            if self.use_batchnorm:
                gamma = self.params["gamma{}".format(l + 1)]
                beta = self.params["beta{}".format(l + 1)]
                IN, cache = affine_batchnorm_relu_forward(
                    IN, W, b, gamma, beta, self.bn_params[l])
            else:
                IN, cache = affine_relu_forward(IN, W, b)

            caches[l] = cache

            if self.use_dropout:
                IN, d_cache = dropout_forward(IN, self.dropout_param)
                dropout_caches[l] = d_cache

        # forward pass: last affine layer
        num_last = self.num_layers
        name_W_last = "W{}".format(num_last)
        name_b_last = "b{}".format(num_last)
        W_last = self.params[name_W_last]
        b_last = self.params[name_b_last]

        scores, cache_last = affine_forward(IN, W_last, b_last)

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        #######################################################################
        # TODO: Implement the backward pass for the fully-connected net.
        # Store the loss in the loss variable and gradients in the grads
        # dictionary. Compute data loss using softmax, and make sure that
        # grads[k] holds the gradients for self.params[k]. Don't forget to add
        # L2 regularization!
        #
        # When using batch normalization, you don't need to regularize the
        # scale and shift parameters.
        #
        # NOTE: To ensure that your implementation matches ours and you pass
        # the automated tests, make sure that your L2 regularization includes a
        # factor of 0.5 to simplify the expression for the gradient.
        #######################################################################

        # loss
        loss, dscores = softmax_loss(scores, y)

        # regularization loss
        for l in range(self.num_layers):
            W = self.params["W{}".format(l + 1)]
            loss += 0.5 * self.reg * np.sum(W * W)

        # backprop through last affine layer
        dx, dw, db = affine_backward(dscores, cache_last)
        grads[name_W_last] = dw + self.reg * W_last
        grads[name_b_last] = db

        # backprop through affine-batchnorm-relu layers
        for l in reversed(range(self.num_layers - 1)):
            name_W = "W{}".format(l + 1)
            name_b = "b{}".format(l + 1)

            if self.use_dropout:
                dx = dropout_backward(dx, dropout_caches[l])

            if self.use_batchnorm:
                dx, dw, db, dgamma, dbeta = affine_batchnorm_relu_backward(
                    dx, caches[l])
                grads["gamma{}".format(l + 1)] = dgamma
                grads["beta{}".format(l + 1)] = dbeta
            else:
                dx, dw, db = affine_relu_backward(dx, caches[l])
            grads[name_W] = dw + self.reg * self.params[name_W]
            grads[name_b] = db

        #######################################################################
        #                             END OF YOUR CODE                        #
        #######################################################################

        return loss, grads
示例#12
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        out_1, cache_1 = layer_utils.affine_relu_forward(
            X, self.params['W1'], self.params['b1'])
        out_2, cache_2 = layer_utils.affine_relu_forward(
            out_1, self.params['W2'], self.params['b2'])
        scores = out_2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dsoftmax = layers.softmax_loss(scores, y)
        dl2, dw2, db2 = layer_utils.affine_relu_backward(dsoftmax, cache_2)
        _, dw1, db1 = layer_utils.affine_relu_backward(dl2, cache_1)

        # add regularization loss
        for w in [self.params['W1'], self.params['W2']]:
            loss += self.reg * np.sum(w * w) * 0.5

        grads['W1'] = dw1 + self.reg * self.params['W1']
        grads['W2'] = dw2 + self.reg * self.params['W2']
        grads['b1'] = db1
        grads['b2'] = db2
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
示例#13
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################

        out = X
        caches = []
        for i in range(self.num_layers):
            w_name = 'W{}'.format(i)
            b_name = 'b{}'.format(i)

            w = self.params[w_name]
            b = self.params[b_name]

            if i == self.num_layers - 1:
                out, cache = layer_utils.affine_forward(out, w, b)
            else:
                out, cache = layer_utils.affine_relu_forward(out, w, b)

            caches.append(cache)

        scores = out

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################

        softmax_loss, dsoftmax = layers.softmax_loss(scores, y)

        reg_loss = 0
        for key in self.params.keys():
            if key.startswith('W'):
                w = self.params[key]
                reg_loss += self.reg * np.sum(w * w) * 0.5

        loss = softmax_loss + reg_loss

        dx = dsoftmax
        for i in reversed(range(self.num_layers)):
            w_name = 'W{}'.format(i)
            b_name = 'b{}'.format(i)

            if i == self.num_layers - 1:
                dx, dw, db = layer_utils.affine_backward(dx, caches[i])
            else:
                dx, dw, db = layer_utils.affine_relu_backward(dx, caches[i])

            grads[w_name] = dw
            grads[b_name] = db

        for key in self.params.keys():
            if key.startswith('W'):
                w = self.params[key]
                grads[key] += self.reg * w

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
示例#14
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
          parameter names to gradients of the loss with respect to those
          parameters.
        """
        scores = None
        #####################################################################
        # TODO: Implement the forward pass for the two-layer net, computing #
        # the class scores for X and storing them in the scores variable.   #
        #####################################################################
        a, cache_relu = affine_relu_forward(X, self.params['W1'],
                                            self.params['b1'])
        scores, cache_scores = affine_forward(a, self.params['W2'],
                                              self.params['b2'])
        #####################################################################
        #                             END OF YOUR CODE                      #
        #####################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ######################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the #
        # loss in the loss variable and gradients in the grads dictionary.   #
        # Compute data loss using softmax, and make sure that grads[k] hold  #
        # the gradients for self.params[k]. Don't forget to add L2           #
        # regularization!                                                    #
        # NOTE: To ensure that your                                          #
        # implementation matches ours and you pass the automated tests,      #
        # make sure that your L2 regularization includes a factor of 0.5 to  #
        # simplify the expression for the gradient.                          #
        ######################################################################
        loss, dscores = softmax_loss(scores, y)
        dx, grads['W2'], grads['b2'] = affine_backward(dscores, cache_scores)
        _, grads['W1'], grads['b1'] = affine_relu_backward(dx, cache_relu)

        # add regularization
        ss = np.sum(self.params['W1'] ** 2) + np.sum(self.params['W2'] ** 2)
        loss += 0.5 * self.reg * ss
        grads['W1'] += self.reg * self.params['W1']
        grads['W2'] += self.reg * self.params['W2']
        ######################################################################
        #                             END OF YOUR CODE                       #
        ######################################################################

        return loss, grads
示例#15
0
文件: fc_net.py 项目: sid-basu/cs231n
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        from cs231n.layer_utils import affine_relu_forward, affine_forward, softmax_loss
        a1 = affine_relu_forward(X, self.params['W1'], self.params['b1'])[0]

        scores = affine_forward(a1, self.params['W2'], self.params['b2'])[0]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        loss, dx = softmax_loss(scores, y)

        loss += 0.5 * self.reg * np.sum(self.params['W1'] * self.params['W1'])
        loss += 0.5 * self.reg * np.sum(self.params['W2'] * self.params['W2'])

        grads['W2'] = a1.T.dot(dx) + self.reg * self.params['W2']
        grads['b2'] = np.sum(dx, axis=0)

        da1 = dx.dot(self.params['W2'].T)
        da1[a1 <= 0] = 0

        grads['W1'] = X.reshape(X.shape[0],
                                -1).T.dot(da1) + self.reg * self.params['W1']
        grads['b1'] = np.sum(da1, axis=0)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
示例#16
0
文件: fc_net.py 项目: sid-basu/cs231n
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        from cs231n.layer_utils import affine_relu_forward, softmax_loss
        activations = {}
        cache = {}
        dropout_cache = {}
        activations[0] = X

        if self.use_batchnorm:
            for i in range(1, self.num_layers):
                if self.use_dropout:
                    tmp_activations, cache[i] = affine_batchnorm_relu_forward(
                        activations[i - 1], self.params['W{0}'.format(i)],
                        self.params['b{0}'.format(i)],
                        self.params['gamma{0}'.format(i)],
                        self.params['beta{0}'.format(i)],
                        self.bn_params[i - 1])

                    activations[i], dropout_cache[i] = dropout_forward(
                        tmp_activations, self.dropout_param)

                else:

                    activations[i], cache[i] = affine_batchnorm_relu_forward(
                        activations[i - 1], self.params['W{0}'.format(i)],
                        self.params['b{0}'.format(i)],
                        self.params['gamma{0}'.format(i)],
                        self.params['beta{0}'.format(i)],
                        self.bn_params[i - 1])

            scores, cache[self.num_layers] = affine_forward(
                activations[self.num_layers - 1],
                self.params['W{0}'.format(self.num_layers)],
                self.params['b{0}'.format(self.num_layers)])

        else:
            for i in range(1, self.num_layers):
                if self.use_dropout:
                    tmp_activations, cache[i] = affine_relu_forward(
                        activations[i - 1], self.params['W{0}'.format(i)],
                        self.params['b{0}'.format(i)])

                    activations[i], dropout_cache[i] = dropout_forward(
                        tmp_activations, self.dropout_param)

                else:
                    activations[i], cache[i] = affine_relu_forward(
                        activations[i - 1], self.params['W{0}'.format(i)],
                        self.params['b{0}'.format(i)])

            scores, cache[self.num_layers] = affine_forward(
                activations[self.num_layers - 1],
                self.params['W{0}'.format(self.num_layers)],
                self.params['b{0}'.format(self.num_layers)])

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        from cs231n.layer_utils import affine_relu_backward
        loss, dx = softmax_loss(scores, y)

        for i in range(self.num_layers):
            loss += 0.5 * self.reg * np.sum(self.params['W{0}'.format(i + 1)] *
                                            self.params['W{0}'.format(i + 1)])

        if self.use_batchnorm:
            #get grads for top layer, add in regularization term
            activations['da{0}'.format(self.num_layers - 1)], grads[
                'W{0}'.format(self.num_layers)], grads['b{0}'.format(
                    self.num_layers)] = affine_backward(
                        dx, cache[self.num_layers])
            grads['W{0}'.format(
                self.num_layers)] += self.reg * self.params['W{0}'.format(
                    self.num_layers)]

            if self.use_dropout:
                #get grads for other layers (dropout)
                for i in range(self.num_layers - 1, 0, -1):
                    activations['da{0}post_dropout'.format(
                        i - 1)] = dropout_backward(
                            activations['da{0}'.format(i)], dropout_cache[i])
                    activations['da{0}'.format(i - 1)], grads['W{0}'.format(
                        i)], grads['b{0}'.format(i)], grads['gamma{0}'.format(
                            i)], grads['beta{0}'.format(
                                i)] = affine_batchnorm_relu_backward(
                                    activations['da{0}post_dropout'.format(i -
                                                                           1)],
                                    cache[i])
                    grads['W{0}'.format(
                        i)] += self.reg * self.params['W{0}'.format(i)]

            else:
                #get grads for other layers (no dropout)
                for i in range(self.num_layers - 1, 0, -1):
                    activations['da{0}'.format(i - 1)], grads['W{0}'.format(
                        i)], grads['b{0}'.format(i)], grads['gamma{0}'.format(
                            i)], grads['beta{0}'.format(
                                i)] = affine_batchnorm_relu_backward(
                                    activations['da{0}'.format(i)], cache[i])
                    grads['W{0}'.format(
                        i)] += self.reg * self.params['W{0}'.format(i)]

        else:
            #get grads for top layer, add in regularization term
            activations['da{0}'.format(self.num_layers - 1)], grads[
                'W{0}'.format(self.num_layers)], grads['b{0}'.format(
                    self.num_layers)] = affine_backward(
                        dx, cache[self.num_layers])
            grads['W{0}'.format(
                self.num_layers)] += self.reg * self.params['W{0}'.format(
                    self.num_layers)]

            if self.use_dropout:
                #get grads for other layers (dropout)
                for i in range(self.num_layers - 1, 0, -1):
                    activations['da{0}post_dropout'.format(
                        i - 1)] = dropout_backward(
                            activations['da{0}'.format(i)], dropout_cache[i])
                    activations['da{0}'.format(i - 1)], grads['W{0}'.format(
                        i)], grads['b{0}'.format(i)] = affine_relu_backward(
                            activations['da{0}post_dropout'.format(i - 1)],
                            cache[i])
                    grads['W{0}'.format(
                        i)] += self.reg * self.params['W{0}'.format(i)]

            else:
                #get grads for other layers (no dropout)
                for i in range(self.num_layers - 1, 0, -1):
                    activations['da{0}'.format(i - 1)], grads['W{0}'.format(
                        i)], grads['b{0}'.format(i)] = affine_relu_backward(
                            activations['da{0}'.format(i)], cache[i])
                    grads['W{0}'.format(
                        i)] += self.reg * self.params['W{0}'.format(i)]

        # #get grads for top layer
        # grads['W{0}'.format(self.num_layers)] = activations['a{0}'.format(self.num_layers - 1)].T.dot(dx) + self.reg * self.params['W{0}'.format(self.num_layers)]
        # grads['b{0}'.format(self.num_layers)] = np.sum(dx, axis = 0)

        # activations['da{0}'.format(self.num_layers - 1)] = dx.dot(self.params['W{0}'.format(self.num_layers)].T)
        # activations['da{0}'.format(self.num_layers - 1)][activations['a{0}'.format(self.num_layers - 1)] <= 0] = 0

        # #get grads for intermediate layers
        # for i in reversed(range(1, self.num_layers - 1)):
        #     activations['da{0}'.format(i)] = activations['da{0}'.format(i + 1)].dot(self.params['W{0}'.format(i + 1)].T)
        #     activations['da{0}'.format(i)][activations['a{0}'.format(i)] <= 0] = 0

        # for i in reversed(range(2, self.num_layers)):
        #     grads['W{0}'.format(i)] = activations['a{0}'.format(i - 1)].T.dot(activations['da{0}'.format(i)]) + self.reg * self.params['W{0}'.format(i)]
        #     grads['b{0}'.format(i)] = np.sum(activations['da{0}'.format(i)], axis = 0)

        # #get grads for the initial layer
        # grads['W1'] = X.reshape(X.shape[0], -1).T.dot(activations['da1']) + self.reg * self.params['W1']
        # grads['b1'] = np.sum(activations['da1'], axis = 0)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads