示例#1
0
文件: apply.py 项目: xxxibntkjgn/ML
plt.show()

# c) Adaboost using the simple classifiers

kMax = 50  # Number of weak classifiers
nSamples = 20  # Number of random samples to train each classifier

# Compute parameters of K classifiers and the voting weight for each classifier
alphaK, para = adaboostSimple(X, Y, kMax, nSamples)

# Sample test data from a regular grid to illustrate the decision regions
X_grid, Y_grid, x, y = generate_grid(X)
X_ = np.vstack((X_grid.reshape((-1, )), Y_grid.reshape((-1, )))).T

# Compute discrete class predictions and continuous class probabilities
classLabels, logits = eval_adaBoost_simpleClassifier(X_, alphaK, para)

# Show decision surface
plot_decision_surface(X_, classLabels, X, Y, 'Decision Surface')

# Visualize logits
logits_r = np.reshape(logits, (len(x), len(y)))  # reshape into original shape
plot_logits(logits_r, 'Weighted sum of weak classifier results')
"""
# d) Adaboost with cross-validation

kMax = 50          # Number of weak classifiers
nSamples = 20      # Number of random samples to train each classifier
percent = 0.2      # Percentage of test data

alphaK, para, testX, testY, error = adaboostCross(X, Y, kMax, nSamples, percent)
示例#2
0
def adaboostCross(X, Y, K, nSamples, percent):
    # Adaboost with an additional cross validation routine
    #
    # INPUT:
    # X         : training examples (numSamples x numDims )
    # Y         : training lables (numSamples x 1)
    # K         : number of weak classifiers to select (scalar)
    #             (the _maximal_ iteration count - possibly abort earlier)
    # nSamples  : number of training examples which are selected in each round. (scalar)
    #             The sampling needs to be weighted!
    # percent   : percentage of the data set that is used as test data set (scalar)
    #
    # OUTPUT:
    # alphaK    : voting weights (K x 1)
    # para      : parameters of simple classifier (K x 2)
    # testX     : test dataset (numTestSamples x numDim)
    # testY     : test labels  (numTestSamples x 1)
    # error	    : error rate on validation set after each of the K iterations (K x 1)

    #cross-validation step to the training procedure
    # Randomly sample a percentage of the data as test data set
    N = len(X)
    numb = round(N * percent)
    pos = choice(N, numb, False)
    allpos = range(N)
    restpos = np.setdiff1d(allpos, pos)

    testX = X[pos]
    testY = Y[pos]
    X = X[restpos]
    Y = Y[restpos]

    # Initialization
    n = N - numb

    # Initialize the classifier models
    j = np.zeros(K) * (-1)
    theta = np.zeros(K) * (-1)

    alphaK = np.zeros(K)
    w = (np.ones(n) / n).reshape(n, 1)

    error = np.zeros(K)

    for k in range(K):  # Iterate over all classifiers

        # Sample data with weights
        index = choice(n, nSamples, True, w.ravel())
        X_sampled = X[index, :]
        Y_sampled = Y[index]

        # Train the weak classifier C_k
        j[k], theta[k] = simpleClassifier(X_sampled, Y_sampled)

        cY = (np.ones(n) * (-1)).reshape(
            n, 1)  # placeholder for class predictions
        cY[X[:, int(j[k] - 1)] > theta[k]] = 1  # classify

        # Calculate weighted error for given classifier
        temp = np.where([Y[i] != cY[i] for i in range(n)], 1, 0).reshape(n, 1)
        ek = np.sum(w * temp)

        # If the error is zero, the data set is correct classified - break the loop
        if ek < 1.0e-01:
            alphaK[k] = 1
            break

        # Compute the voting weight for the weak classifier alpha_k
        alphaK[k] = 0.5 * np.log((1 - ek) / ek)

        # Update the weights
        w = w * np.exp((-alphaK[k] * (Y * cY)))
        w = w / sum(w)

        para = np.stack((j[:k + 1], theta[:k + 1]), axis=1)

        # Compute error for boosted classifier
        classlabels, _ = eval_adaBoost_simpleClassifier(
            testX, alphaK[:k + 1], para[:k + 1])
        classlabels = classlabels.reshape(len(classlabels), 1)

        error[k] = len(classlabels[classlabels != testY]) / len(testY)

    para = np.stack((j, theta), axis=1)

    return alphaK, para, testX, testY, error
示例#3
0
def adaboostCross(X, Y, K, nSamples, percent):
    # Adaboost with an additional cross validation routine
    #
    # INPUT:
    # X         : training examples (numSamples x numDims )
    # Y         : training lables (numSamples x 1)
    # K         : number of weak classifiers to select (scalar)
    #             (the _maximal_ iteration count - possibly abort earlier)
    # nSamples  : number of training examples which are selected in each round. (scalar)
    #             The sampling needs to be weighted!
    # percent   : percentage of the data set that is used as test data set (scalar)
    #
    # OUTPUT:
    # alphaK    : voting weights (K x 1)
    # para      : parameters of simple classifier (K x 2)
    # testX     : test dataset (numTestSamples x numDim)
    # testY     : test labels  (numTestSamples x 1)
    # error        : error rate on validation set after each of the K iterations (K x 1)

    #####Insert your code here for subtask 1d#####
    # Randomly sample a percentage of the data as test data set
    """

    textX = sampled data with the train_prop
    textY = sampled data with the train_prop

    TODO
    extract the instances of train_prop with exact indices

    """

    N, _ = X.shape

    train_prop = 1 - percent

    train_N = round(N*train_prop)

    pos = choice(N, round(N*percent), False)

    allpos = range(N)

    restpos = np.setdiff1d(allpos, pos)

    train_X = X[restpos, :]
    train_Y = Y[restpos]

    testX = X[pos]
    testY = Y[pos]
    error = np.zeros(K)

    j = np.zeros(K) * (-1)
    theta = np.zeros(K) * (-1)

    # weight is the factor used to regulate the choosing probability of each data point
    w = (np.ones(train_N) / train_N).reshape(train_N, 1)

    alpha = np.zeros(K)

    for k in range(K):

        index = choice(train_N, nSamples, True, w.ravel())

        sX = train_X[index, :]
        sY = train_Y[index]

        j[k], theta[k] = simpleClassifier(sX, sY)

        cY = (np.ones(train_N) * -1).reshape(train_N, 1)
        cY[train_X[:, int(j[k] - 1)] > theta[k]] = 1

        temp = np.where([Y[i] != cY[i] for i in range(train_N)], 1, 0).reshape(train_N, 1)

        ek = np.sum(temp * w)

        if ek < 1.0e-01:
            alpha[k] = 1
            break

        alpha[k] = 0.5 * np.log((1 - ek) / ek)
        w = w * np.exp(-alpha[k] * cY * train_Y)
        w = w / np.sum(w)  # normalization, otherwise, the weights grow exponentially

        para = np.stack((j[:k + 1], theta[:k + 1]), axis=1)

        classlabels, _ = eval_adaBoost_simpleClassifier(testX, alpha[:k+1],
                                                        para[:k+1])
        classlabels = classlabels.reshape(len(classlabels), 1)

        error[k] = len(classlabels[classlabels != testY]) / len(testY)

    alphaK = alpha
    para = np.stack((j, theta), axis=1)

    return alphaK, para, testX, testY, error
示例#4
0
# c) Adaboost using the simple classifiers

kMax=50
# kMax = 50          # Number of weak classifiers
nSamples = 20      # Number of random samples to train each classifier

# Compute parameters of K classifiers and the voting weight for each classifier
alphaK, para = adaboostSimple(X, Y, kMax, nSamples)

# Sample test data from a regular grid to illustrate the decision regions
X_grid, Y_grid, x, y = generate_grid(X)
X_ = np.vstack((X_grid.reshape((-1,)), Y_grid.reshape((-1,)))).T

# Compute discrete class predictions and continuous class probabilities
classLabels, logits = eval_adaBoost_simpleClassifier(X_, alphaK, para)

# Show decision surface
plot_decision_surface(X_, classLabels, X, Y, 'Decision Surface')

# Visualize logits
logits_r = np.reshape(logits, (len(x), len(y)))  # reshape into original shape
plot_logits(logits_r, 'Weighted sum of weak classifier results')


# d) Adaboost with cross-validation

kMax = 50          # Number of weak classifiers
nSamples = 20      # Number of random samples to train each classifier
percent = 0.2      # Percentage of test data
def adaboostCross(X, Y, K, nSamples, percent):
    # Adaboost with an additional cross validation routine
    #
    # INPUT:
    # X         : training examples (numSamples x numDims )
    # Y         : training lables (numSamples x 1)
    # K         : number of weak classifiers to select (scalar)
    #             (the _maximal_ iteration count - possibly abort earlier)
    # nSamples  : number of training examples which are selected in each round. (scalar)
    #             The sampling needs to be weighted!
    # percent   : percentage of the data set that is used as test data set (scalar)
    #
    # OUTPUT:
    # alphaK    : voting weights (K x 1)
    # para      : parameters of simple classifier (K x 2)
    # testX     : test dataset (numTestSamples x numDim)
    # testY     : test labels  (numTestSamples x 1)
    # error	    : error rate on validation set after each of the K iterations (K x 1)
    numSamples, numDim = np.shape(X)
    test_num = int(numSamples * percent)
    training_num = numSamples - test_num
    if training_num < nSamples:
        print("Error: 测试集错误")

    X = np.reshape(X, (numSamples, numDim))
    Y = np.reshape(Y, (numSamples, 1))
    # initialization
    W = np.ones([training_num, 1]) * 1 / training_num
    alphaK = np.zeros([K, 1])
    para = np.zeros([K, 2])
    error = np.zeros([K, 1])
    choice_test = choice(numSamples, test_num, replace=False)
    testX = X[choice_test]
    testY = Y[choice_test]
    trainingX = []
    trainingY = []
    for i in range(numSamples):
        if i in choice_test:
            pass
        else:
            trainingX.append(X[i, :])
            trainingY.append(Y[i, :])
    trainingX = np.reshape(trainingX, (training_num, 2))
    trainingY = np.reshape(trainingY, (training_num, 1))
    for i in range(K):
        my_randomorder = choice(training_num, nSamples, replace=False)
        training_set = trainingX[my_randomorder]
        training_lables = trainingY[my_randomorder]
        W_nsample = W[my_randomorder]
        result_lables = np.ones([nSamples, 1])
        j, theta = simpleClassifier(training_set, training_lables)
        para[i, 0] = j
        para[i, 1] = theta
        result_lables[training_set[:, j] < theta] = -1
        error_temp = W_nsample[result_lables != training_lables]
        training_error = sum(error_temp) / sum(W_nsample)
        alphaK[i, 0] = 0.5 * np.log((1 - training_error) / training_error)
        W_nsample = W_nsample * (result_lables == training_lables
                                 ) + W_nsample * (result_lables != training_lables) * np.exp(alphaK[i, 0])
        W[my_randomorder] = W_nsample
        #estimate the validation of  round i in K
        classLabels, test_result = eval_adaBoost_simpleClassifier(testX, alphaK[0:i, :], para[:i, :])
        error[i, 0] = sum((np.reshape(classLabels, (test_num, 1)) != testY) * 1) / test_num
    error = error[:, 0]
    #####Insert your code here for subtask 1d#####
    # Randomly sample a percentage of the data as test data set
    return alphaK, para, testX, testY, error