Пример #1
0
def q6():
    """ Bagging - sample with replacement """
    spamData = hw3.pandas_to_data(hw3.load_and_normalize_spambase())
    y, X = hw4.split_truth_from_data(spamData)
    bagged = bag.Bagging(max_rounds=100, sample_size=1000, learner=lambda: DecisionTreeClassifier(max_depth=3))
    bagged.fit(X, y)
    kf_fold = hw4.partition_folds(spamData, .4)
    test_y, test_X = hw4.split_truth_from_data(kf_fold[0])
    test_pred = bagged.predict(test_X)
    test_y = bagged._check_y(test_y)
    test_pred = bagged._check_y(test_pred)
    test_error = float(sum([0 if py == ty else 1 for py, ty in zip(test_pred, test_y)]))/len(test_y)
    print 'Final testing error: {}'.format(test_error)
Пример #2
0
def q2():
    """Boosting on UCI datasets"""
    crx = dl.data_q3_crx()
    #crx = dl.data_q3_vote()
    num_points = len(crx)
    for i in xrange(5, 85, 5):
        percent = float(i)/100
        all_folds = hw4.partition_folds(crx, percent)
        kf_train = all_folds[0]
        kf_test = all_folds[1]
        y, X = hw4.split_truth_from_data(kf_train)
        y_test, X_test = hw4.split_truth_from_data(kf_test)
        adaboost = run_adaboost(X, y, X_test, y_test, 'q2_crx')
        yt_pred = adaboost.predict(X_test)
        yt_pred = adaboost._check_y(yt_pred)
        y_test = adaboost._check_y(y_test)
        round_err = float(np.sum([1 if yt!=yp else 0 for yt, yp in zip(yt_pred, y_test)]))/len(y_test)
        last_round = adaboost.local_errors.keys()[-1]
        #print 'Error at {}%: Train: {} Test: {}'.format(percent, adaboost.adaboost_error[last_round], round_err)
        print 'Error at {}%: Test: {}'.format(percent, round_err)