示例#1
0
label = {'y': ['yes', 'no']}

num_run = 100
T = 1000

test_py = np.array([[0 for x in range(test_size)] for y in range(num_run)])
test_py_first = np.array([0 for x in range(test_size)])

for iter in range(num_run):
    train_subset = train_data.sample(n=1000, replace=False, random_state=iter)
    for t in range(T):
        print('iter: ', iter, 't: ', t)
        # sample with replace
        sampled = train_subset.sample(frac=0.01, replace=True, random_state=t)
        # ID3
        dt_generator = dt.ID3(feature_selection=0, max_depth=17, subset=6)
        # get decision tree
        decision_tree = dt_generator.generate_decision_tree(
            sampled, features, label)
        ## predict
        # test
        py = dt_generator.classify(decision_tree, test_data)
        py = np.array(py.tolist())
        py[py == 'yes'] = 1
        py[py == 'no'] = -1
        py = py.astype(int)
        test_py[iter] = test_py[iter] + py
        if t == 0:
            test_py_first = test_py_first + py

true_value = np.array(test_data['y'].tolist())
示例#2
0
文件: car.py 项目: GregDobby/CS6350
    'maint': ['vhigh', 'high', 'med', 'low'],
    'doors': ['2', '3', '4', '5more'],
    'persons': ['2', '4', 'more'],
    'lug_boot': ['small', 'med', 'big'],
    'safety': ['low', 'med', 'high']
}

label = {'label': ['unacc', 'acc', 'good', 'vgood']}

train_acc = [[0 for x in range(6)] for y in range(3)]
test_acc = [[0 for x in range(6)] for y in range(3)]

for feature_selection in range(3):
    for max_depth in range(6):
        # ID3
        dt_generator = dt.ID3(feature_selection=feature_selection,
                              max_depth=max_depth + 1)
        # get decision tree
        decision_tree = dt_generator.generate_decision_tree(
            train_data, features, label)
        # train acc
        # predict
        train_data['plabel'] = dt_generator.classify(decision_tree, train_data)
        train_acc[feature_selection][max_depth] = train_data.apply(
            lambda row: 1
            if row['label'] == row['plabel'] else 0, axis=1).sum() / train_size
        # test acc
        # predict
        test_data['plabel'] = dt_generator.classify(decision_tree, test_data)
        test_acc[feature_selection][max_depth] = test_data.apply(
            lambda row: 1
            if row['label'] == row['plabel'] else 0, axis=1).sum() / test_size