def train_check(self, X, y, lamda = 1e-3): # Gradient check. # Numerically compute the gradient along several randomly # chosen dimensions, and compare them with your analytically # computed gradient. The numbers should match almost exactly # along all dimensions. J, dW, db = self._costFcn(self.W, self.b, X, y, lamda) print '\nGradient check on W' f = lambda W: self._costFcn(W, self.b, X, y, lamda)[0] grad_numerical = grad_check_sparse(f, self.W, dW) print '\nGradient check on b' f = lambda b: self._costFcn(self.W, b, X, y, lamda)[0] grad_numerical = grad_check_sparse(f, self.b, db)
def train_check(self, X, y, lamda=1e-3): # Gradient check. # Numerically compute the gradient along several randomly # chosen dimensions, and compare them with your analytically # computed gradient. The numbers should match almost exactly # along all dimensions. J, dW, db = self._costFcn(self.W, self.b, X, y, lamda) print '\nGradient check on W' f = lambda W: self._costFcn(W, self.b, X, y, lamda)[0] grad_numerical = grad_check_sparse(f, self.W, dW) print '\nGradient check on b' f = lambda b: self._costFcn(self.W, b, X, y, lamda)[0] grad_numerical = grad_check_sparse(f, self.b, db)
def train_check(self, X, y, lamda = 1e-3): # Gradient check. # Numerically compute the gradient along several randomly # chosen dimensions, and compare them with your analytically # computed gradient. The numbers should match almost exactly # along all dimensions. J, dW, db = self._costFcn(X, y, lamda) print 'J =', J, 'sanity check =', np.log(10) for l in xrange(1, 3): print '\n grad. check on W', l f = lambda W: self._costFcn(X, y, lamda)[0] grad_numerical = grad_check_sparse(f, self.W[l], dW[l]) print '\n grad. check on b', l f = lambda b: self._costFcn(X, y, lamda)[0] grad_numerical = grad_check_sparse(f, self.b[l], db[l])
W = np.random.randn(3073, 10) * 0.0001 loss, grad = linear_svm.svm_loss_naive(W, X_dev, y_dev, 0.000005) print('loss: %f' % (loss, )) # Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you # Compute the loss and its gradient at W. loss, grad = linear_svm.svm_loss_naive(W, X_dev, y_dev, 0.0) # Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. f = lambda w: linear_svm.svm_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = gradient_check.grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = linear_svm.svm_loss_naive(W, X_dev, y_dev, 5e1) f = lambda w: linear_svm.svm_loss_naive(w, X_dev, y_dev, 5e1)[0] grad_numerical = gradient_check.grad_check_sparse(f, W, grad) # Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. tic = time.time() loss_naive, grad_naive = linear_svm.svm_loss_naive(W, X_dev, y_dev, 0.000005) toc = time.time() print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic)) tic = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic)) from Softmax import softmax_loss_vec tic = time.time() loss_vec, grad_vec = softmax_loss_vec(W, X_dev, Y_dev, 0.00001) toc = time.time() print('Vectorized loss: %e computed in %fs' % (loss_vec, toc - tic)) print('loss difference: %f' % (loss_naive - loss_vec)) print('gradient difference: %f' % (np.linalg.norm(grad_naive - grad_vec, ord='fro'))) # check the gradient from gradient_check import grad_check_sparse f = lambda w: softmax_loss_vec(w, X_dev, Y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad_vec) loss_vec, grad_vec = softmax_loss_vec(W, X_dev, Y_dev, 1e2) f = lambda w: softmax_loss_vec(w, X_dev, Y_dev, 1e2)[0] grad_numerical = grad_check_sparse(f, W, grad_vec) # implement SGD and find best parameters from linear_classifier import Softmax results = {} best_val = -1 beat_softmax = None learning_rates = [10**(x * 2) for x in range(-3, 3)] reg_strength = [10**(x * 2) for x in range(-3, 3)] for lr in learning_rates:
loss, grad = softmax_loss_vectorized(theta, X_train, y_train, 0.0) ======= loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0) >>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d # Loss should be something close to - log(0.1) print 'loss:', loss, ' should be close to ', - np.log(0.1) # Use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. (within 1e-7) from gradient_check import grad_check_sparse f = lambda th: softmax_loss_naive(th, X_train, y_train, 0.0)[0] grad_numerical = grad_check_sparse(f, theta, grad, 10) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. <<<<<<< HEAD ======= >>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d tic = time.time() loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001) toc = time.time() print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)
x_val=np.hstack( [x_val,np.ones((x_val.shape[0],1))]) x_dev=np.hstack( [x_dev,np.ones((x_dev.shape[0],1))]) x_test=np.hstack( [x_test,np.ones((x_test.shape[0],1))]) print(x_train.shape,y_train.shape,x_val.shape,y_val.shape,x_dev.shape,y_dev.shape,x_test.shape,y_test.shape) #main code ''' #test svm naive w=np.random.randn(3073,10)*0.0001 loss,grad=svm_loss_naive(w,x_dev,y_dev,1e2) f= lambda w:svm_loss_naive(w,x_dev,y_dev,1e2)[0] grad_numerical=grad_check_sparse(f,w,grad) print('loss is : %f' %loss) ''' ''' #test svm linearclassifier svm=LinearSVM() tic=time.time() loss_hist=svm.train(x_train,y_train,learning_rate=1e-7,reg=5e4,num_iters=2500,verbose=True) toc=time.time() print('that took %f s' %(toc-tic)) y_train_pred=svm.predict(x_train) y_val_pred=svm.predict(x_val) print('training accuracy: %f ' %(np.mean(y_train==y_train_pred))) print('validation accuracy: %f ' %(np.mean(y_val==y_val_pred))) ''' #test softmax naive w=np.random.randn(3073,10)*0.0001 loss,grad=softmax_loss_naive(w,x_dev,y_dev,1e3) f= lambda w:softmax_loss_naive(w,x_dev,y_dev,1e3)[0] grad_numerical=grad_check_sparse(f,w,grad) print('loss is : %f' %loss)
print("use a random weight to compute loss.....") # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). print('loss: %f' % loss) print('sanity check: %f' % (-np.log(0.1))) print("check the naive gradient............") # Complete the implementation of softmax_loss_naive and implement a (naive) # version of the gradient that uses nested loops. loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # As we did for the SVM, use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) print("check the another naive gradient............") # similar to SVM case, do another gradient check with regularization loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1) f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) print("the time between naive and vectorized..........") # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005) toc = time.time() print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))