plt.show() # c) Adaboost using the simple classifiers kMax = 50 # Number of weak classifiers nSamples = 20 # Number of random samples to train each classifier # Compute parameters of K classifiers and the voting weight for each classifier alphaK, para = adaboostSimple(X, Y, kMax, nSamples) # Sample test data from a regular grid to illustrate the decision regions X_grid, Y_grid, x, y = generate_grid(X) X_ = np.vstack((X_grid.reshape((-1, )), Y_grid.reshape((-1, )))).T # Compute discrete class predictions and continuous class probabilities classLabels, logits = eval_adaBoost_simpleClassifier(X_, alphaK, para) # Show decision surface plot_decision_surface(X_, classLabels, X, Y, 'Decision Surface') # Visualize logits logits_r = np.reshape(logits, (len(x), len(y))) # reshape into original shape plot_logits(logits_r, 'Weighted sum of weak classifier results') """ # d) Adaboost with cross-validation kMax = 50 # Number of weak classifiers nSamples = 20 # Number of random samples to train each classifier percent = 0.2 # Percentage of test data alphaK, para, testX, testY, error = adaboostCross(X, Y, kMax, nSamples, percent)
def adaboostCross(X, Y, K, nSamples, percent): # Adaboost with an additional cross validation routine # # INPUT: # X : training examples (numSamples x numDims ) # Y : training lables (numSamples x 1) # K : number of weak classifiers to select (scalar) # (the _maximal_ iteration count - possibly abort earlier) # nSamples : number of training examples which are selected in each round. (scalar) # The sampling needs to be weighted! # percent : percentage of the data set that is used as test data set (scalar) # # OUTPUT: # alphaK : voting weights (K x 1) # para : parameters of simple classifier (K x 2) # testX : test dataset (numTestSamples x numDim) # testY : test labels (numTestSamples x 1) # error : error rate on validation set after each of the K iterations (K x 1) #cross-validation step to the training procedure # Randomly sample a percentage of the data as test data set N = len(X) numb = round(N * percent) pos = choice(N, numb, False) allpos = range(N) restpos = np.setdiff1d(allpos, pos) testX = X[pos] testY = Y[pos] X = X[restpos] Y = Y[restpos] # Initialization n = N - numb # Initialize the classifier models j = np.zeros(K) * (-1) theta = np.zeros(K) * (-1) alphaK = np.zeros(K) w = (np.ones(n) / n).reshape(n, 1) error = np.zeros(K) for k in range(K): # Iterate over all classifiers # Sample data with weights index = choice(n, nSamples, True, w.ravel()) X_sampled = X[index, :] Y_sampled = Y[index] # Train the weak classifier C_k j[k], theta[k] = simpleClassifier(X_sampled, Y_sampled) cY = (np.ones(n) * (-1)).reshape( n, 1) # placeholder for class predictions cY[X[:, int(j[k] - 1)] > theta[k]] = 1 # classify # Calculate weighted error for given classifier temp = np.where([Y[i] != cY[i] for i in range(n)], 1, 0).reshape(n, 1) ek = np.sum(w * temp) # If the error is zero, the data set is correct classified - break the loop if ek < 1.0e-01: alphaK[k] = 1 break # Compute the voting weight for the weak classifier alpha_k alphaK[k] = 0.5 * np.log((1 - ek) / ek) # Update the weights w = w * np.exp((-alphaK[k] * (Y * cY))) w = w / sum(w) para = np.stack((j[:k + 1], theta[:k + 1]), axis=1) # Compute error for boosted classifier classlabels, _ = eval_adaBoost_simpleClassifier( testX, alphaK[:k + 1], para[:k + 1]) classlabels = classlabels.reshape(len(classlabels), 1) error[k] = len(classlabels[classlabels != testY]) / len(testY) para = np.stack((j, theta), axis=1) return alphaK, para, testX, testY, error
def adaboostCross(X, Y, K, nSamples, percent): # Adaboost with an additional cross validation routine # # INPUT: # X : training examples (numSamples x numDims ) # Y : training lables (numSamples x 1) # K : number of weak classifiers to select (scalar) # (the _maximal_ iteration count - possibly abort earlier) # nSamples : number of training examples which are selected in each round. (scalar) # The sampling needs to be weighted! # percent : percentage of the data set that is used as test data set (scalar) # # OUTPUT: # alphaK : voting weights (K x 1) # para : parameters of simple classifier (K x 2) # testX : test dataset (numTestSamples x numDim) # testY : test labels (numTestSamples x 1) # error : error rate on validation set after each of the K iterations (K x 1) #####Insert your code here for subtask 1d##### # Randomly sample a percentage of the data as test data set """ textX = sampled data with the train_prop textY = sampled data with the train_prop TODO extract the instances of train_prop with exact indices """ N, _ = X.shape train_prop = 1 - percent train_N = round(N*train_prop) pos = choice(N, round(N*percent), False) allpos = range(N) restpos = np.setdiff1d(allpos, pos) train_X = X[restpos, :] train_Y = Y[restpos] testX = X[pos] testY = Y[pos] error = np.zeros(K) j = np.zeros(K) * (-1) theta = np.zeros(K) * (-1) # weight is the factor used to regulate the choosing probability of each data point w = (np.ones(train_N) / train_N).reshape(train_N, 1) alpha = np.zeros(K) for k in range(K): index = choice(train_N, nSamples, True, w.ravel()) sX = train_X[index, :] sY = train_Y[index] j[k], theta[k] = simpleClassifier(sX, sY) cY = (np.ones(train_N) * -1).reshape(train_N, 1) cY[train_X[:, int(j[k] - 1)] > theta[k]] = 1 temp = np.where([Y[i] != cY[i] for i in range(train_N)], 1, 0).reshape(train_N, 1) ek = np.sum(temp * w) if ek < 1.0e-01: alpha[k] = 1 break alpha[k] = 0.5 * np.log((1 - ek) / ek) w = w * np.exp(-alpha[k] * cY * train_Y) w = w / np.sum(w) # normalization, otherwise, the weights grow exponentially para = np.stack((j[:k + 1], theta[:k + 1]), axis=1) classlabels, _ = eval_adaBoost_simpleClassifier(testX, alpha[:k+1], para[:k+1]) classlabels = classlabels.reshape(len(classlabels), 1) error[k] = len(classlabels[classlabels != testY]) / len(testY) alphaK = alpha para = np.stack((j, theta), axis=1) return alphaK, para, testX, testY, error
# c) Adaboost using the simple classifiers kMax=50 # kMax = 50 # Number of weak classifiers nSamples = 20 # Number of random samples to train each classifier # Compute parameters of K classifiers and the voting weight for each classifier alphaK, para = adaboostSimple(X, Y, kMax, nSamples) # Sample test data from a regular grid to illustrate the decision regions X_grid, Y_grid, x, y = generate_grid(X) X_ = np.vstack((X_grid.reshape((-1,)), Y_grid.reshape((-1,)))).T # Compute discrete class predictions and continuous class probabilities classLabels, logits = eval_adaBoost_simpleClassifier(X_, alphaK, para) # Show decision surface plot_decision_surface(X_, classLabels, X, Y, 'Decision Surface') # Visualize logits logits_r = np.reshape(logits, (len(x), len(y))) # reshape into original shape plot_logits(logits_r, 'Weighted sum of weak classifier results') # d) Adaboost with cross-validation kMax = 50 # Number of weak classifiers nSamples = 20 # Number of random samples to train each classifier percent = 0.2 # Percentage of test data
def adaboostCross(X, Y, K, nSamples, percent): # Adaboost with an additional cross validation routine # # INPUT: # X : training examples (numSamples x numDims ) # Y : training lables (numSamples x 1) # K : number of weak classifiers to select (scalar) # (the _maximal_ iteration count - possibly abort earlier) # nSamples : number of training examples which are selected in each round. (scalar) # The sampling needs to be weighted! # percent : percentage of the data set that is used as test data set (scalar) # # OUTPUT: # alphaK : voting weights (K x 1) # para : parameters of simple classifier (K x 2) # testX : test dataset (numTestSamples x numDim) # testY : test labels (numTestSamples x 1) # error : error rate on validation set after each of the K iterations (K x 1) numSamples, numDim = np.shape(X) test_num = int(numSamples * percent) training_num = numSamples - test_num if training_num < nSamples: print("Error: 测试集错误") X = np.reshape(X, (numSamples, numDim)) Y = np.reshape(Y, (numSamples, 1)) # initialization W = np.ones([training_num, 1]) * 1 / training_num alphaK = np.zeros([K, 1]) para = np.zeros([K, 2]) error = np.zeros([K, 1]) choice_test = choice(numSamples, test_num, replace=False) testX = X[choice_test] testY = Y[choice_test] trainingX = [] trainingY = [] for i in range(numSamples): if i in choice_test: pass else: trainingX.append(X[i, :]) trainingY.append(Y[i, :]) trainingX = np.reshape(trainingX, (training_num, 2)) trainingY = np.reshape(trainingY, (training_num, 1)) for i in range(K): my_randomorder = choice(training_num, nSamples, replace=False) training_set = trainingX[my_randomorder] training_lables = trainingY[my_randomorder] W_nsample = W[my_randomorder] result_lables = np.ones([nSamples, 1]) j, theta = simpleClassifier(training_set, training_lables) para[i, 0] = j para[i, 1] = theta result_lables[training_set[:, j] < theta] = -1 error_temp = W_nsample[result_lables != training_lables] training_error = sum(error_temp) / sum(W_nsample) alphaK[i, 0] = 0.5 * np.log((1 - training_error) / training_error) W_nsample = W_nsample * (result_lables == training_lables ) + W_nsample * (result_lables != training_lables) * np.exp(alphaK[i, 0]) W[my_randomorder] = W_nsample #estimate the validation of round i in K classLabels, test_result = eval_adaBoost_simpleClassifier(testX, alphaK[0:i, :], para[:i, :]) error[i, 0] = sum((np.reshape(classLabels, (test_num, 1)) != testY) * 1) / test_num error = error[:, 0] #####Insert your code here for subtask 1d##### # Randomly sample a percentage of the data as test data set return alphaK, para, testX, testY, error