Пример #1
0
def test_max_depth(titanic_training_data):
    tree_clf = twigy.DecisionTreeClassifier(max_depth=5, max_features=1)
    tree_clf.build_tree(titanic_training_data[0], titanic_training_data[1])
    assert tree_clf.tree.nodes[0].split_feature == 1
    assert tree_clf.tree.nodes[1].split_feature == 7
    assert tree_clf.tree.nodes[2].split_feature == 6
    assert tree_clf.tree.nodes[3].split_feature == 2
    assert tree_clf.tree.nodes[4].split_feature == 7
Пример #2
0
def test_result(titanic_training_data):
    tree_clf = twigy.DecisionTreeClassifier(max_depth=5)
    tree_clf.build_tree(titanic_training_data[0], titanic_training_data[1])

    assert len(tree_clf.tree.nodes) == 53
    assert tree_clf.tree.nodes[0].n_samples == 891
    assert tree_clf.tree.nodes[0].split_feature == 5
    assert tree_clf.tree.nodes[0].threshold == 1.5
    assert tree_clf.tree.nodes[0].left_child_id == 1
    assert tree_clf.tree.nodes[0].right_child_id == 26
    assert tree_clf.tree.nodes[0].impurity == pytest.approx(0.473013)
    assert tree_clf.tree.nodes[0].value == [549, 342]

    assert tree_clf.tree.nodes[22].n_samples == 32
    assert tree_clf.tree.nodes[22].impurity == pytest.approx(0.0)
    assert tree_clf.tree.nodes[22].value == [32, 0]

    assert tree_clf.tree.nodes[49].n_samples == 21
    assert tree_clf.tree.nodes[49].split_feature == 2
    assert tree_clf.tree.nodes[49].threshold == 0.5
    assert tree_clf.tree.nodes[49].left_child_id == 50
    assert tree_clf.tree.nodes[49].right_child_id == 51
    assert tree_clf.tree.nodes[49].impurity == pytest.approx(0.17233559)
    assert tree_clf.tree.nodes[49].value == [19, 2]
Пример #3
0
def test_normal_min_impurity_split(titanic_training_data):
    tree_clf = twigy.DecisionTreeClassifier(max_depth=5, min_impurity_split=0.1)
    tree_clf.build_tree(titanic_training_data[0], titanic_training_data[1])
    assert len(tree_clf.tree.nodes) == 47
Пример #4
0
def test_max_depth(titanic_training_data):
    tree_clf = twigy.DecisionTreeClassifier(max_depth=1)
    tree_clf.build_tree(titanic_training_data[0], titanic_training_data[1])
    assert len(tree_clf.tree.nodes) == 3
Пример #5
0
import numpy as np
import twigy
import pandas as pd

# read and prepare training data
titanic_data = pd.read_csv("./test/testdata/titanic_data.csv")
X = titanic_data.drop(columns=['Survived']).to_numpy()
y = np.reshape(titanic_data[['Survived']].to_numpy(), (-1))

# initialize, train and print decision tree classifier
tree_clf = twigy.DecisionTreeClassifier()
tree_clf.build_tree(X, y)
tree_clf.print_tree()

# predict class probabilities on training data
preds = tree_clf.predict_classes(X)

# initialize, train and print random forest classifier
forest_clf = twigy.RandomForestClassifier()
forest_clf.build_forest(X, y)

# predict class probabilities on training data
preds = forest_clf.predict_classes(X)