Exemplo n.º 1
0
def test_decidion_tree_predict():
    dataset = datasets.load_iris()
    x = dataset.data
    y = dataset.target
    clf = DecisionTree()
    clf.fit(x, y)
    y_pred = clf.predict(x)
    assert np.array_equal(y, y_pred)
Exemplo n.º 2
0
def test_decision_tree_fit():
    tree = DecisionTree(3)
    x = np.array([[1,2],[2,3],[4,5]])
    y = np.array([0,1,1])
    tree.fit(x, y)
    assert tree.root.left.predict == 0
    assert tree.root.right.predict == 1
    assert tree.root.left.left is None
    assert tree.root.left.right is None
    assert tree.root.right.left is None
    assert tree.root.right.right is None
    assert tree.root.max_depth == 3
    assert tree.root.left.max_depth == 2
    assert tree.root.right.max_depth == 2
    assert tree.root.feature == 1
    assert tree.root.threshold == 3
Exemplo n.º 3
0
def test_decision_tree_init():
    tree = DecisionTree(3)
    assert type(tree.root) is TreeNode
    assert tree.root.max_depth == 3
    assert tree.root.feature is None
    assert tree.root.threshold is None
    assert tree.root.predict is None
    assert tree.root.left is None
    assert tree.root.right is None
Exemplo n.º 4
0
    def fit(self, X, y):
        """
            The fit function fits the Random Forest model based on the training data. 

            X_train is a matrix or 2-D numpy array, represnting training instances. 
            Each training instance is a feature vector. 

            y_train contains the corresponding labels.
        """
        for i in range(self.n_trees):
            np.random.seed()
            temp_clf = DecisionTree(
                max_depth=self.max_depth,
                size_allowed=self.size_allowed,
                n_features=self.n_features,
                n_split=self.n_split,
            )
            temp_clf.fit(X, y)
            self.trees.append(temp_clf)
        return self
Exemplo n.º 5
0
def test_gradient_boosting_predict():
    clf = GradientBoosting(learning_rate=0.1, n_estimators=20, max_depth=5)
    dataset = datasets.load_iris()
    x = dataset.data
    y = dataset.target
    clf.fit(x, y)
    assert len(clf.predict(x)) == len(y)
    assert clf.learning_rate == 0.1
    assert clf.n_estimators == 20
    assert clf.max_depth == 5
    assert len(clf.trees) == clf.n_estimators

    clf = GradientBoosting(learning_rate=0.1, n_estimators=1, max_depth=5000)
    dataset = datasets.load_iris()
    x = dataset.data
    y = dataset.target
    pred_gb = clf.fit(x, y)

    tree = DecisionTree()
    pred_t = tree.fit(x, y)
    assert np.array_equal(pred_gb, pred_t)
Exemplo n.º 6
0
def main():
    data_path = 'gielda.txt'
    decision_tree = DecisionTree(data_path, separator=',')
    decision_tree.create_decision_table()
    info_a1 = decision_tree.calculate_info_for_selected_column(0)
    gain_a2 = decision_tree.calculate_gain_for_selected_column(1)
    print('Info a1 = {} \nGain a2 = {}'.format(info_a1, gain_a2))
 def train(self,
           training_set,
           learning_rate,
           learn_threshold,
           gradient_step_learning_rate,
           classifier_train_kwargs,
           classifier_init_args,
           classifier_init_kwargs,
           loss_function_derivative,  # d(L(ŷ, y)) / d(ŷ)
           iteration_count,
           max_descent_iterations=500000,
           verbose=False):
     self.classifiers = [None]
     self.coefficients = np.zeros(1)
     self.coefficients[0] = training_set.y.mean()
     self.previous_model_leaves_count = 1
     for iteration in range(1, iteration_count + 1):
         if verbose:
             print("Started iteration " + str(iteration))
         self.classifiers.append(DecisionTree(*classifier_init_args, **classifier_init_kwargs))
         current_training_set = self.get_pseudo_residuals(loss_function_derivative, training_set)
         self.classifiers[-1].train(current_training_set, **classifier_train_kwargs)
         new_leaves_count = self.classifiers[-1].root.set_leaf_index(self.previous_model_leaves_count) - self.previous_model_leaves_count
         self.coefficients = np.hstack((self.coefficients, np.zeros(new_leaves_count)))
         if verbose:
             print("Tree is trained, calculating coefficient...")
         self.coefficient_gradient_descent(training_set,
                                           iteration,
                                           loss_function_derivative,
                                           learning_rate,
                                           gradient_step_learning_rate,
                                           learn_threshold,
                                           max_descent_iterations)
         self.previous_model_leaves_count += new_leaves_count
         if verbose:
             print("")
     if verbose:
         print("Finished training")
Exemplo n.º 8
0
def test_decision_tree_inf_criteria():
    tree = DecisionTree(3)
    result = tree._inf_criteria(np.array([1,1,1,1,3,11,2]))
    assert result == pytest.approx(11.551, 0.001)
    assert tree._inf_criteria(np.array([])) == 0