def test_softmax_loss_vectorized_gradient(self): loss_naive, grad_naive = softmax_loss_naive(self.weights, self.x, self.y, self.reg) loss_vect, grad_vect = softmax_loss_vectorized(self.weights, self.x, self.y, self.reg) np.testing.assert_allclose(loss_naive, loss_vect, 1e-04) np.testing.assert_allclose(grad_naive, grad_vect, 1e-04)
def loss(self, X, y=None, reg=0.0): """ Compute the loss and gradients for a two layer fully connected neural network. Inputs: - X: Input data of shape (N, D). Each X[i] is a training sample. - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is an integer in the range 0 <= y[i] < C. This parameter is optional; if it is not passed then we only return scores, and if it is passed then we instead return the loss and gradients. - reg: Regularization strength. Returns: If y is None, return a matrix scores of shape (N, C) where scores[i, c] is the score for class c on input X[i]. If y is not None, instead return a tuple of: - loss: Loss (data loss and regularization loss) for this batch of training samples. - grads: Dictionary mapping parameter names to gradients of those parameters with respect to the loss function; has the same keys as self.params. """ # Unpack variables from the params dictionary W1, b1 = self.params['W1'], self.params['b1'] W2, b2 = self.params['W2'], self.params['b2'] N, D = X.shape X1 = np.insert(X, 0, 1, axis=1) W11 = np.insert(W1, 0, b1, axis=0) W21 = np.insert(W2, 0, b2, axis=0) # Compute the forward pass scores = None z1 = X.dot(W11) Layer1 = np.maximum(0, z1) scores = Layer1.dot(W21) ############################################################################# # TODO: Perform the forward pass, computing the class scores for the input. # # Store the result in the scores variable, which should be an array of # # shape (N, C). # ############################################################################# pass ############################################################################# # END OF YOUR CODE # ############################################################################# # If the targets are not given then jump out, we're done if y is None: return scores loss = 0.0 # Compute the loss L = np.exp(scores) #print sc for i in range(X.shape[0]): loss -= scores[i][y[i]] loss += math.log(sum(L[i])) loss /= X.shape[0] loss += +0.5 * reg * np.sum(W1 * W1) + 0.5 * reg * np.sum(W2 * W2) ############################################################################# # TODO: Finish the forward pass, and compute the loss. This should include # # both the data loss and L2 regularization for W1 and W2. Store the result # # in the variable loss, which should be a scalar. Use the Softmax # # classifier loss. So that your results match ours, multiply the # # regularization loss by 0.5 # ############################################################################# pass ############################################################################# # END OF YOUR CODE # ############################################################################# # Backward pass: compute gradients l1, gradient = softmax_loss_naive(W21, Layer1, y, reg) #print "grad:",gradient.shape grads = {} ''' print "W1:",W1.shape print "W2:",W2.shape print "X:",X.shape print "b1:",b1.shape print "b2:",b2.shape print "L!",Layer1.shape dW=np.zeros_like(W2) prob_sum_term=np.sum(L,axis=1) for i in xrange(X.shape[0]): L[i]/=prob_sum_term[i] mask=[cf==y[i] for cf in xrange(W2.shape[1])] L[i]=L[i]-mask for feat in xrange(W2.shape[0]): dW[feat]+=(L[i].T*Layer1[i][feat]) ''' #print "Gradient in ans:",gradient #grads={} grads['W2'] = gradient + reg * W2 #np.dot(Layer1,gradient) grads['W1'] = W1 grads['b2'] = b2 #gradient[0] grads['b1'] = b1 ############################################################################# # TODO: Compute the backward pass, computing the derivatives of the weights # # and biases. Store the results in the grads dictionary. For example, # # grads['W1'] should store the gradient on W1, and be a matrix of same size # ############################################################################# pass ############################################################################# # END OF YOUR CODE # ############################################################################# return loss, grads
def test_softmax_loss_naive_loss(self): loss, _ = softmax_loss_naive(self.weights, self.x, self.y, self.reg) np.testing.assert_allclose(loss, self.expected, 1e-04)
def test_softmax_loss_naive_gradient(self): loss, grad = softmax_loss_naive(self.weights, self.x, self.y, self.reg) f = lambda w: softmax_loss_naive(self.weights, self.x, self.y, self.reg )[0] grad_numerical = test_utils.grad_check_sparse(f, self.weights, grad)
X_train, y_train, X_val, y_val, X_test, y_test = utils.get_CIFAR10_data() # First implement the naive softmax loss function with nested loops. # Open the file softmax.py and implement the # softmax_loss_naive function. # Generate a random softmax theta matrix and use it to compute the loss. theta = np.random.randn(3073,10) * 0.0001 <<<<<<< HEAD loss, grad = softmax_loss_vectorized(theta, X_train, y_train, 0.0) ======= loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0) >>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d # Loss should be something close to - log(0.1) print 'loss:', loss, ' should be close to ', - np.log(0.1) # Use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. (within 1e-7) from gradient_check import grad_check_sparse f = lambda th: softmax_loss_naive(th, X_train, y_train, 0.0)[0] grad_numerical = grad_check_sparse(f, theta, grad, 10) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized.
print('X_train shape: ' + str(X_train.shape)) print('Y_train shape: ' + str(Y_train.shape)) print('X_test shape: ' + str(X_test.shape)) print('Y_test shape: ' + str(Y_test.shape)) print('X_val shape: ' + str(X_val.shape)) print('Y_val shape: ' + str(Y_val.shape)) print('X_dev shape: ' + str(X_dev.shape)) print('Y_dev shape: ' + str(Y_dev.shape)) ############################################################################# ################################# softmax classifier # First implement the naive softmax loss function with nested loops. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax.softmax_loss_naive(W, X_dev, Y_dev, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). print('loss: ' + str(loss)) print('sanity check: ' + str(-np.log(0.1))) # the reason is that: there are 10 classes, #and the averge of Loss if approximately equal to -np.log(0.1) # As we did for the SVM, use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. loss, grad = softmax.softmax_loss_naive(W, X_dev, Y_dev, 0.0) f = lambda w: softmax.softmax_loss_naive(w, X_dev, Y_dev, 0.0)[0] Tools.grad_check_sparse(f, W, grad, 10) # similar to SVM case, do another gradient check with regularization loss, grad = softmax.softmax_loss_naive(W, X_dev, Y_dev, 5e1)
file = "../train/" + str(i) + ".png" x = si.imread(file) x = x.reshape(3072) X_val.append(x) if i % 5000 == 0: print("Reading data for index = ", i) X_val = np.asarray(X_val) X_train, _, _ = utils.std_features(X_train) X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T X_val, _, _ = utils.std_features(X_val) X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T theta = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0) # Loss should be something close to - log(0.1) print 'loss:', loss, ' should be close to ', -np.log(0.1) tic = time.time() loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001) toc = time.time() print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic) tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( theta, X_train, y_train, 0.00001) toc = time.time() print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)
x = si.imread(file) x = x.reshape(3072) X_val.append(x) if i % 5000 == 0: print("Reading data for index = ", i) X_val = np.asarray(X_val) X_train,_,_ = utils.std_features(X_train) X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T X_val,_,_ = utils.std_features(X_val) X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T theta = np.random.randn(3073,10) * 0.0001 loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0) # Loss should be something close to - log(0.1) print 'loss:', loss, ' should be close to ', - np.log(0.1) tic = time.time() loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001) toc = time.time() print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic) tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized(theta, X_train, y_train, 0.00001) toc = time.time() print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)
##print 'Train data shape: ', X_train.shape ##print 'Train labels shape: ', y_train.shape ##print 'Validation data shape: ', X_val.shape ##print 'Validation labels shape: ', y_val.shape ##print 'Test data shape: ', X_test.shape ##print 'Test labels shape: ', y_test.shape W = np.random.randn(10,3073)*0.0001 ##loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0) ##print 'loss: %f' % loss loss, grad = softmax_loss_vectorized(W, X_train, y_train, 0.0) print 'loss: %f' % loss from gradient_check import grad_check_sparse f = lambda w: softmax_loss_naive(w, X_train, y_train, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) ##import numpy as np ##a = np.arange(15).reshape(3, 5)*0.1 ##probs = a / np.sum(a, axis=0) ##y = np.random.choice(3, 5) # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of over 0.35 on the validation set. results = {} best_val = -1 best_softmax = None