Пример #1
0
def trainAdaBoost(x, y, num_of_trees):
    rows, _ = np.shape(x)
    class_names = np.unique(y)
    class_names = np.flip(class_names)
    #Weights for each class in each 'parallell' tree
    list_of_ensembles = [
    ]  # Three ensembles for each class_name poor, median, excellent

    for i in range(len(class_names)):
        y_binary = convertOneVsAllToBinary(y, class_names[i])
        ensemble = []
        weights = np.ones(shape=(
            rows,
            1,
        ), dtype=float)

        for _ in range(num_of_trees):
            stump = Tree(x, y_binary, 1, weights)
            stump.train(x, y_binary, 'boosting', weights)
            y_pred = testTree(x, stump)
            y_pred = y_pred.astype(int)
            epsilon = calculateEpsilon(y_binary, y_pred, weights)
            alpha = calculateAlpha(epsilon)
            ensemble.append((
                alpha,
                stump,
            ))

            #reweights
            weights = reweight(y_binary, y_pred, weights)
        # should make three ensembles
        list_of_ensembles.append(ensemble)

    return list_of_ensembles
Пример #2
0
def trainTree(x, y, depth, weights=None):
    # return tree that have been trained

    # Initiate tree object
    dTree = Tree(x, y, depth)  # Need to create the tree
    dTree.train(x, y, 'boosting', weights)  # Train it bruh
    # TBD
    return dTree
Пример #3
0
def trainBaggingEnsemble(x, y, depth, num_of_trees):
    percentage = 3 / 4
    ensemble = []
    rows, _ = np.shape(x)
    row_indexes = np.arange(rows)
    num_of_training_data = math.ceil(rows * percentage)
    for _ in range(num_of_trees):
        # I want to only use 75% of the data when training
        # Randomize which data to pull 75% out of x for each tree
        np.random.shuffle(row_indexes)
        x = x[row_indexes, :]
        y = y[row_indexes, :]

        training_x = x[0:num_of_training_data, :]
        training_y = y[0:num_of_training_data, :]
        dTree = Tree(training_x, training_y, depth)
        dTree.train(training_x, training_y, 'bagging')
        ensemble.append(dTree)

    return ensemble