def compute_loss_grad_naive(self): # Evaluate the naive implementation of the loss we provided for you # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, self.X_dev, self.y_dev, 0.00001) print 'loss: %f' % (loss, ) #Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, self.X_dev, self.y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. f = lambda w: svm_loss_naive(w, self.X_dev, self.y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_naive(W, self.X_dev, self.y_dev, 1e2) f = lambda w: svm_loss_naive(w, self.X_dev, self.y_dev, 1e2)[0] grad_numerical = grad_check_sparse(f, W, grad) return
def loss_checker(W, x_dev, y_dev): # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, x_dev, y_dev, 0.000005) toc = time.time() print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic)) tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, x_dev, y_dev, 0.000005) toc = time.time() print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) # The losses should match but your vectorized implementation should be much faster. #there is the small diff in loss_naive and loss_vectorized because I include margin 1 when x[i] == y[i] # but in loss_naive i do not include this print('difference: %f' % (loss_naive - loss_vectorized))
def speed_up(X_dev, y_dev): print() print("*****************start speed up*****************") W = np.random.randn(3073, 10) * 0.0001 tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005) toc = time.time() print("Naive loss: %e computed in %fs" % (loss_naive, toc - tic)) tic = time.time() loss_vectorized, grad_vectorized = svm_loss_vectorized( W, X_dev, y_dev, 0.000005) toc = time.time() print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) print("loss difference: %f" % (loss_naive - loss_vectorized)) difference = np.linalg.norm(grad_naive - grad_vectorized, ord="fro") print("gradient difference: %f" % difference)
def vectorize_loss_computation(self): # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. W = np.random.randn(3073, 10) * 0.0001 tic = time.time() loss_naive, grad_naive = svm_loss_naive(W, self.X_dev, self.y_dev, 0.00001) toc = time.time() print 'Naive loss: %e computed in %fs' % (loss_naive, toc - tic) tic = time.time() loss_vectorized, _ = svm_loss_vectorized(W, self.X_dev, self.y_dev, 0.00001) toc = time.time() print 'Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic) # The losses should match but your vectorized implementation should be much faster. print 'difference: %f' % (loss_naive - loss_vectorized) return
def vectorize_grad_computation(self): # Complete the implementation of svm_loss_vectorized, and compute the gradient # of the loss function in a vectorized way. # The naive implementation and the vectorized implementation should match, but # the vectorized version should still be much faster. W = np.random.randn(3073, 10) * 0.0001 tic = time.time() _, grad_naive = svm_loss_naive(W, self.X_dev, self.y_dev, 0.00001) toc = time.time() print 'Naive loss and gradient: computed in %fs' % (toc - tic) tic = time.time() _, grad_vectorized = svm_loss_vectorized(W, self.X_dev, self.y_dev, 0.00001) toc = time.time() print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm to compare them. difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print 'difference: %f' % difference return
# only has to worry about optimizing a single weight matrix W. X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape) # Evaluate the naive implementation of the loss we provided for you: from assignment1.cs231n.classifiers.linear_svm import svm_loss_naive import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005) print('loss: %f' % (loss, )) # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from assignment1.cs231n.gradient_check import grad_check_sparse f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad)
# ## SVM Classifier # # Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**. # # As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function. # In[ ]: # Evaluate the naive implementation of the loss we provided for you: from assignment1.cs231n.classifiers.linear_svm import svm_loss_naive, svm_loss_vectorized import time # generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001 loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001) print 'loss: %f' % (loss, ) loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.00001) # The `grad` returned from the function above is right now all zero. # Derive and implement the gradient for the SVM cost function and implement it # inline inside the function `svm_loss_naive`. # You will find it helpful to interleave your new code inside the existing function. # # To check that you have correctly implemented the gradient correctly, # you can numerically estimate the gradient of the loss function and compare the numeric estimate # to the gradient that you computed. We have provided code that does this for you: # In[ ]: