class DigitClassifier(): def __init__(self): self.svm = LinearSVM() self.fit() def fit(self): minst = tf.keras.datasets.mnist (X_train, y_train), (X_test, y_test) = minst.load_data() num_train = 15000 num_val = 1000 num_dev = 500 num_test = 10000 # Validation set mask = range(num_train, num_train + num_val) X_val = X_train[mask] y_val = y_train[mask] # Train set mask = range(num_train) X_train = X_train[mask] y_train = y_train[mask] # Small training set (development set) mask = np.random.choice(num_train, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the images data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) tic = time.time() loss_hist = self.svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # plt.plot(loss_hist) # plt.xlabel('Iteration number') # plt.ylabel('Loss value') # plt.show() def predict(self, x): return self.svm.predict(x)
#_, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.000005) toc = time.time() print('Vectorized loss and gradient: computed in %fs' % (toc - tic)) # The loss is a single number, so it is easy to compare the values computed # by the two implementations. The gradient on the other hand is a matrix, so # we use the Frobenius norm(对应元素的平方和再开方) to compare them. #difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') #print('difference: %f' % difference) ''' # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. from linear_classifier import LinearSVM svm = LinearSVM() tic = time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) ''''' # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value')
# Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 5e1) f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad) # In the file linear_classifier.py, implement SGD in the function # LinearClassifier.train() and then run it with the code below. svm=LinearSVM() tic=time.time() loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc=time.time() print('That tooks %fs'%(toc-tic)) """ # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.savefig('/home/hongyin/file/cs231n-assignment1/picFaster.jpg') """
def cross_validation(X_train, y_train, X_val, y_val): ############################################################################################# # Use the validation set to tune hyperparameters (regularization strength and # learning rate). You should experiment with different ranges for the learning # rates and regularization strengths; if you are careful you should be able to # get a classification accuracy of about 0.4 on the validation set. ############################################################################################# learning_rates = [1e-7, 5e-5] regularization_strengths = [5e4, 1e5] # results is dictionary mapping tuples of the form # (learning_rate, regularization_strength) to tuples of the form # (training_accuracy, validation_accuracy). The accuracy is simply the fraction # of data points that are correctly classified. results = {} best_val = -1 # The highest validation accuracy that we have seen so far. best_svm = None # The LinearSVM object that achieved the highest validation rate. ################################################################################ # TODO: # # Write code that chooses the best hyperparameters by tuning on the validation # # set. For each combination of hyperparameters, train a linear SVM on the # # training set, compute its accuracy on the training and validation sets, and # # store these numbers in the results dictionary. In addition, store the best # # validation accuracy in best_val and the LinearSVM object that achieves this # # accuracy in best_svm. # # # # Hint: You should use a small value for num_iters as you develop your # # validation code so that the SVMs don't take much time to train; once you are # # confident that your validation code works, you should rerun the validation # # code with a larger value for num_iters. # ################################################################################ iters = 2000 #100 for lr in learning_rates: for rs in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = svm.predict(X_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_svm = svm ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print('lr %e reg %e train accuracy: %f val accuracy: %f' % (lr, reg, train_accuracy, val_accuracy)) print('best validation accuracy achieved during cross-validation: %f' % best_val) return results, best_svm
loss, grad = svmDevObj1.svm_loss_naive() f = lambda w: svmDevObj1.svm_loss_naive()[0] grad_numerical = grad_check_sparse(f, W, grad) print('grad_numerical: ', grad_numerical) # do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? svmDevObj2 = SVM(W, X_dev, y_dev, 5e1) loss, grad = svmDevObj2.svm_loss_naive() f = lambda w: svmDevObj2.svm_loss_naive()[0] grad_numerical = grad_check_sparse(f, W, grad) print('grad_numerical: ', grad_numerical) ##################### linearSVM = LinearSVM() tic = time.time() loss_hist = linearSVM.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) toc = time.time() print('That took %fs' % (toc - tic)) # A useful debugging strategy is to plot the loss as a function of # iteration number: plt.plot(loss_hist) plt.xlabel('Iteration number') plt.ylabel('Loss value')
b1 = np.zeros((train_num, 256)) for i in range(train_num): a = np.bincount(train_images[i].astype(int), minlength=256).reshape(1, -1) b1[i] = a b2 = np.zeros((test_num, 256)) for i in range(test_num): a = np.bincount(test_images[i].astype(int), minlength=256).reshape(1, -1) b2[i] = a b3 = np.zeros((val_num, 256)) for i in range(val_num): a = np.bincount(test_images[i].astype(int), minlength=256).reshape(1, -1) b3[i] = a #================================================================================= # loss,grad=svm.svm_loss_naive(w,train_images,train_labels,reg) # print(loss, grad) svm = LinearSVM() #创建分类器对象,此时W为空 loss_hist = svm.train(train_images, train_labels, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True) #此时svm对象中有W y_train_pred = svm.predict(train_images) print('training accuracy: %f' % (np.mean(train_labels == y_train_pred))) # y_val_pred = svm.predict(val_images) # print('validation accuracy: %f'%(np.mean(val_labels==y_val_pred))) # 超参数调优(交叉验证) learning_rates = [1.4e-7, 1.5e-7, 1.6e-7] # for循环的简化写法12个
x[ix] = oldval + h fxph = f(x) x[ix] = oldval - h fxmh = f(x) x[ix] = oldval grad_numerical = (fxph - fxmh) / (2 * h) grad_analytic = analytic_grad[ix] rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)) #现在我们对加入了正则项的梯度进行检验 loss, grad = svm.svm_loss_naive(w,x_dev,y_dev,0.0) f = lambda w:svm.svm_loss_naive(w,x_dev,y_dev,0.0)[0] grad_numerical = grad_check_sparse(f,w,grad) # 模型进行测试 svm = LinearSVM() #创建对象,此时W为空 tic = time.time() loss_hist = svm.train(x_train,y_train,learning_rate = 1e-7,reg = 2.5e4,num_iters = 1500,verbose = True) #此时svm对象中有W toc = time.time() print('that took %fs' % (toc -tic)) plt.plot(loss_hist) plt.xlabel('iteration number') plt.ylabel('loss value') plt.show() #训练完成之后,将参数保存,使用参数进行预测,计算准确率 y_train_pred = svm.predict(x_train) print('training accuracy: %f'%(np.mean(y_train==y_train_pred))) y_val_pred = svm.predict(x_val) print('validation accuracy: %f'%(np.mean(y_val==y_val_pred))) '''
# plt.ylabel('Loss value') # plt.show() # tune hyperparameters learningRates = [1e-7] regularization = [5e3] # iteration = [3000, 4000, 5000, 6000, 7000, 8000] iteration = [6000] bestParams = [] bestValAcc = 0 bestSvm = None for eta in learningRates: for r in regularization: for t in iteration: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=eta, reg=r, num_iters=t, verbose=True) y_train_pred = svm.predict(X_train) y_val_pred = svm.predict(X_val) trainAcc = np.mean(y_train == y_train_pred) valAcc = np.mean(y_val == y_val_pred) print 'iteration: %d train accuracy: %.4f val accuracy: %.4f' % ( t, trainAcc, valAcc) if valAcc > bestValAcc: bestParams = [eta, r, t] bestValAcc = valAcc
# results is dictionary mapping tuples of the form # (learning_rate, regularization_strength) to tuples of the form # (training_accuracy, validation_accuracy). The accuracy is simply the fraction # of data points that are correctly classified. results = {} # The highest validation accuracy that we have seen so far. best_val = -1 # The LinearSVM object that achieved the highest validation rate. best_svm = None # lr = learning rate , reg = regularization_strength for lr in learning_rates: for reg in regularization_strengths: # new a svm svm = LinearSVM() # train with training set svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=500) # get training set accuracy y_train_pred = svm.predict(X_train) training_accuracy = np.mean(y_train_pred == y_train) #print('Training set accuracy is %f' % (training_accuracy,)) # get validation set accuracy y_val_pred = svm.predict(X_val) validation_accuracy = np.mean(y_val_pred == y_val) #print('Validation set accuracy is %f' % (validation_accuracy,)) # store the results results[(lr, reg)] = (training_accuracy, validation_accuracy) if validation_accuracy > best_val: best_val = validation_accuracy best_svm = svm
def __init__(self): self.svm = LinearSVM() self.fit()
# grad_check_sparse(f, W, grad) # time_start = time.time() # loss_naive, gradient_navie = svm_loss_naive(W, X_dev, y_dev, 5e-6) # time_end = time.time() # print ('Naive loss: ', loss_naive, ' use time: ', time_end - time_start) # # time_start = time.time() # loss_vector, gradient_vector = svm_loss_vectorized(W, X_dev, y_dev, 5e-6) # time_end = time.time() # print ('Vector loss: ', loss_vector, ' use time: ', time_end - time_start) # print ('different loss: ', loss_vector - loss_naive) from linear_classifier import LinearSVM svm = LinearSVM() time_start = time.time() loss_histroy = svm.train(X_train, y_train, learning_rate=1.5e-7, reg=3.25e4, num_iters=1500, batch_size=5000, verbose=True) time_end = time.time() print ('train take time: ', time_end - time_start) # 将损失和循环次数画出来,有利于debug plt.plot(loss_histroy) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() y_val_pred = svm.predict(X_val) print ('accuracy: %f' % (np.mean(y_val_pred == y_val)))
print('loss is : %f' % loss) f=lambda w:svm.svm_loss_naive(w,x_dev,y_dev,1e2)[0] grad_numerical=grad_check_sparse(f,w,grad) tic=time.time() loss_naive,grad_naive=svm.svm_loss_naive(w,x_dev,y_dev,0.00001) toc=time.time() print('naive loss: %e computed in %f s' % (loss_naive,toc-tic)) tic=time.time() loss_vectorized,grad_vectorized=svm.svm_loss_vectorized(w,x_dev,y_dev,0.00001) toc=time.time() print('vectoried loss: %e computed in %f s' % (loss_vectorized,toc-tic)) print('difference: %f ' % (loss_naive-loss_vectorized)) svm=LinearSVM() tic=time.time() loss_hist=svm.train(x_train,y_train,learning_rate=1e-7, reg=5e4,num_iters=1500,verbose=True) toc=time.time() print('that took %f s' % (toc-tic)) y_train_pred=svm.predict(x_train) print('training accuracy: %f ' % (np.mean(y_train==y_train_pred))) y_val_pred=svm.predict(x_val) print('validation accuracy : %f '% (np.mean(y_val==y_val_pred))) learning_rates=[1.4e-7,1.5e-7,1.6e-7] regularization_strengths=[(1+i*0.1)*1e4 for i in range(-3,3)]+[(2+0.1*i)*1e4 for i in range(-3,3)] results={} best_val=-1 best_svm=None
# reshape the image data into rows x_train = np.reshape(x_train, (x_train.shape[0], -1)) x_val = np.reshape(x_val, (x_val.shape[0], -1)) x_test = np.reshape(x_test, (x_test.shape[0], -1)) # subtract the mean image mean_image = np.mean(x_train, axis=0) x_train -= mean_image x_val -= mean_image x_test -= mean_image x_train = np.hstack([x_train, np.ones((x_train.shape[0], 1))]).T x_val = np.hstack([x_val, np.ones((x_val.shape[0], 1))]).T x_test = np.hstack([x_test, np.ones((x_test.shape[0], 1))]).T svm = LinearSVM() # find the best learning_rate and regularization_strengths learning_rates = [5e-8, 1e-7, 5e-7, 1e-6] regularization_strengths = [1e3, 5e3, 1e4, 5e4, 1e5, 5e5] results = {} # The highest validation accuracy that we have seen so far best_val = -1 # The LinearSVM object that achieved the highest validation rate best_svm = None for strength in regularization_strengths: for rate in learning_rates:
# val_pred = svm.predict(val_data) # print 'Accuracy: %f' % np.mean(val_labels == val_pred) learning_rates = [1.4e-7, 1.5e-7, 1.6e-7] range = range(-3, 3) regularization_strengths = [(1 + i * 0.1) * 1e4 for i in range] + [(2 + 0.1 * i) * 1e4 for i in range] results = {} best_val = -1 best_svm = None for rs in regularization_strengths: for lr in learning_rates: svm = LinearSVM() loss_hist = svm.train(train_data, train_labels, lr, rs, num_iters=3000) train_labels_pred = svm.predict(train_data) train_accuracy = np.mean(train_labels == train_labels_pred) val_labels_pred = svm.predict(val_data) val_accuracy = np.mean(val_labels == val_labels_pred) if val_accuracy > best_val: best_val = val_accuracy best_svm = svm results[(lr, rs)] = train_accuracy, val_accuracy print 'lr %e reg %e train accuracy: %f val accuracy: %f' % ( lr, rs, train_accuracy, val_accuracy)