def train_loss(*args):
      X = args[0]
      y = args[1]

      res = X
      for l in xrange(self.num_layers):
        prev_res = res
        res = affine_forward(prev_res, args[self.w_idx(l)], args[self.b_idx(l)])

        if l < (self.num_layers - 1):
          if self.use_batchnorm:
            res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                    args[self.bn_bt_idx(l)], self.bn_params[l])
          res = relu_forward(res)
          if self.use_dropout:
            res = dropout_forward(res, self.dropout_param)

      scores = res

      if mode == 'test':
        return scores

      #loss, _ = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      return loss
Пример #2
0
        def train_loss(*args):
            X = args[0]
            y = args[1]

            res = X
            for l in xrange(self.num_layers):
                prev_res = res
                res = affine_forward(prev_res, args[self.w_idx(l)],
                                     args[self.b_idx(l)])

                if l < (self.num_layers - 1):
                    if self.use_batchnorm:
                        res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                                args[self.bn_bt_idx(l)],
                                                self.bn_params[l])
                    res = relu_forward(res)
                    if self.use_dropout:
                        res = dropout_forward(res, self.dropout_param)

            scores = res

            if mode == 'test':
                return scores

            #loss, _ = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            return loss
Пример #3
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.
        Args:
        - X: Input data, numpy array of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
        Returns:
        If y is None, then run a test-time forward pass of the model and
        return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.
        If y is not None, then run a training-time forward and backward pass
        and return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping
        parameter
          names to gradients of the loss with respect to those parameters.
        """
        scores = None
        X = X.astype(self.dtype)
        linear_cache = dict()
        relu_cache = dict()
        dropout_cache = dict()
        """
        TODO: Implement the forward pass for the fully-connected neural
        network, compute the scores and store them in the scores variable.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        VAL = X.copy()

        for i in range(1, self.num_layers):
            linear_cache['L{}'.format(i)] = linear_forward(
                VAL, self.params['W{}'.format(i)],
                self.params['b{}'.format(i)])
            relu_cache['R{}'.format(i)] = relu_forward(
                linear_cache['L{}'.format(i)])
            if self.use_dropout:
                dropout_cache['D{}'.format(i)], dropout_cache['MASK{}'.format(i)] = dropout_forward(relu_cache['R{}'.format(i)],\
                                                                 self.dropout_params['p'], self.dropout_params['train'],\
                                                                 self.dropout_params['seed'])
                VAL = dropout_cache['D{}'.format(i)]
            else:
                VAL = relu_cache['R{}'.format(i)]


        linear_cache['L{}'.format(self.num_layers)] = linear_forward(VAL, self.params['W{}'.format(self.num_layers)],\
                                                           self.params['b{}'.format(self.num_layers)])

        scores = linear_cache['L{}'.format(self.num_layers)]

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores
        loss, grads = 0, dict()
        """
        TODO: Implement the backward pass for the fully-connected net. Store
        the loss in the loss variable and all gradients in the grads
        dictionary. Compute the loss with softmax. grads[k] has the gradients
        for self.params[k]. Add L2 regularisation to the loss function.
        NOTE: To ensure that your implementation matches ours and you pass the
        automated tests, make sure that your L2 regularization includes a
        factor of 0.5 to simplify the expression for the gradient.
        """
        #######################################################################
        #                           BEGIN OF YOUR CODE                        #
        #######################################################################

        loss, grad = softmax(scores, y)

        if self.use_dropout:
            VAR = dropout_cache['D{}'.format(self.num_layers - 1)]
        else:
            VAR = relu_cache['R{}'.format(self.num_layers - 1)]

        dX, grads['W{}'.format(self.num_layers)], grads['b{}'.format(self.num_layers)] = linear_backward(grad, \
            VAR, self.params['W{}'.format(self.num_layers)],self.params['b{}'.format(self.num_layers)])

        grads['W{}'.format(
            self.num_layers)] += self.reg * self.params['W{}'.format(
                self.num_layers)]

        loss += 0.5 * self.reg * np.sum(self.params['W' + str(self.num_layers)]
                                        **2)

        for inx in range(self.num_layers - 1, 0, -1):
            if self.use_dropout:
                dX = dropout_backward(dX, dropout_cache['MASK{}'.format(inx)],
                                      self.dropout_params['p'])

            dX = relu_backward(dX, linear_cache['L' + str(inx)])

            if inx - 1 != 0:
                if self.use_dropout:
                    pre_layer = dropout_cache['D{}'.format(inx - 1)]
                else:
                    pre_layer = relu_cache['R{}'.format(inx - 1)]
                dX, grads['W' +
                          str(inx)], grads['b' + str(inx)] = linear_backward(
                              dX, pre_layer, self.params['W{}'.format(inx)],
                              self.params['b{}'.format(inx)])

                grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)]
                loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2)

            else:

                dX, grads['W' +
                          str(inx)], grads['b' + str(inx)] = linear_backward(
                              dX, X, self.params['W{}'.format(inx)],
                              self.params['b{}'.format(inx)])
                grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)]
                loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2)

        #######################################################################
        #                            END OF YOUR CODE                         #
        #######################################################################
        return loss, grads
 def apply_forward_dropout(self, x):
     x_, mask = dropout_forward(x,
                                p=self.dropout_params["p"],
                                train=self.dropout_params["train"],
                                seed=self.dropout_params["seed"])
     return x_, mask
Пример #5
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        cache = self.num_layers * [None]
        dropout_cache = (self.num_layers - 1) * [None]
        for i in np.arange(self.num_layers - 1):
            if not self.use_batchnorm:
                scores, cache[i] = affine_relu_forward(
                    X if i == 0 else scores, self.params['W%d' % (i + 1)],
                    self.params['b%d' % (i + 1)])
            else:
                scores, cache[i] = affine_bn_relu_forward(
                    X if i == 0 else scores, self.params['W%d' % (i + 1)],
                    self.params['b%d' % (i + 1)],
                    self.params['gamma%d' % (i + 1)],
                    self.params['beta%d' % (i + 1)], self.bn_params[i])
            if self.use_dropout:
                scores, dropout_cache[i] = dropout_forward(
                    scores, self.dropout_param)

        scores, cache[self.num_layers - 1] = affine_forward(
            scores, self.params['W%d' % self.num_layers],
            self.params['b%d' % self.num_layers])
        ############################################################################
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        loss, dscore = softmax_loss(scores, y)
        dx, grads['W%d' %
                  self.num_layers], grads['b%d' %
                                          self.num_layers] = affine_backward(
                                              dscore,
                                              cache[self.num_layers - 1])

        for i in reversed(np.arange(self.num_layers - 1)):
            if self.use_dropout:
                dx = dropout_backward(dx, dropout_cache[i])
            if not self.use_batchnorm:
                dx, grads['W%d' %
                          (i + 1)], grads['b%d' %
                                          (i + 1)] = affine_relu_backward(
                                              dx, cache[i])
            else:
                dx, grads['W%d' % (i+1)], grads['b%d' % (i+1)], grads['gamma%d' % (i+1)], grads['beta%d' % (i+1)] \
                    = affine_bn_relu_backward(dx, cache[i])

        for i in np.arange(self.num_layers):
            loss += .5 * self.reg * np.sum(
                np.square(self.params['W%d' % (i + 1)]))
            grads['W%d' % (i + 1)] += self.reg * self.params['W%d' % (i + 1)]
        ############################################################################
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        ############################################################################

        return loss, grads
Пример #6
0
def rel_error(x, y):
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


# load Cifar-10 data
data = get_Cifar10_data()
for k, v in data.items():
    print('%s: \t' % k, v.shape)

############################# Dropout forward pass ############################

np.random.seed(231)
x = np.random.randn(500, 500) + 10

for p in [0.3, 0.6, 0.75]:
    out, _ = dropout_forward(x, {'mode': 'train', 'p': p})
    out_test, _ = dropout_forward(x, {'mode': 'test', 'p': p})

    print('Running tests with p = ', p)
    print('Mean of input: ', x.mean())
    print('Mean of train-time output: ', out.mean())
    print('Mean of test-time output: ', out_test.mean())
    print('Fraction of train-time output set to zero: ', (out == 0).mean())
    print('Fraction of test-time output set to zero: ', (out_test == 0).mean())
    print()

############################# Dropout backward pass ###########################

np.random.seed(231)
x = np.random.randn(10, 10) + 10
dout = np.random.randn(*x.shape)