Exemplo n.º 1
0
                "node %s." % (
                    actual_node.depth * "\t",
                    actual_node.id,
                    actual_node.parent_id,
                    actual_node.left_node_id,
                    actual_node.feature,
                    actual_node.threshold,
                    actual_node.right_node_id,
                ))
            stack.append(actual_node.left_node)
            stack.append(actual_node.right_node)

    spear_train += 1 - zero_one_loss(node, X, labels) / len(labels)
    spear_valid += 1 - zero_one_loss(node, X_valid, y_valid) / len(y_valid)

    clf = DecisionTreeClassifier(random_state=0,
                                 max_depth=3,
                                 min_samples_leaf=4)
    clf.fit(X, labels)
    clf_train += clf.score(X, labels)
    clf_valid += clf.score(X_valid, y_valid)

    L = ClassificationTree(oblique=False)
    L.initialize_from_CART(X, labels, clf)

    L.print_tree_structure()
print("clf train: ", clf_train / 30)
print("spearman train: ", spear_train / 30)
print("clf valid: ", clf_valid / 30)
print("spearman valid: ", spear_valid / 30)
Exemplo n.º 2
0
    #x = data[8]
    #print (T.predict_label(x.reshape((1, -1)), 0))
    #print (clf.predict(x.reshape((1, -1))))
    #print ("x--->", x)
    #print(T.get_path_to(x, 0))
    #T.print_tree_structure()

    #print ("T acc -> ", 1-T.misclassification_loss(data, label, T.tree[0]))
    #print ("clf acc -> ", clf.score(data, label))
    #node_id = 4
    #x = data[T.tree[node_id].data_idxs]
    #print (x)
    #print (T.predict_label(x, node_id))
    #for (id, node) in T.tree.items():
    #print("Prima: node ", id, " items -->", node.data_idxs)
    T.print_tree_structure()
    tao = TAO(T)
    tao.evolve(X_train, y_train)
    T.compute_prob(X_train, y_train)
    tao_auc_train += T.auc(X_train, y_train)
    tao_auc_valid += T.auc(X_valid, y_valid)

    L = ClassificationTree(oblique=False)
    L.initialize_from_CART(X_train, y_train, clf)
    ls = LocalSearch(L)
    ls.evolve(X_train, y_train, alfa=1000000, max_iteration=10)
    L.compute_prob(X_train, y_train)
    ls_auc_train += L.auc(X_train, y_train)
    ls_auc_valid += L.auc(X_valid, y_valid)
    clf_train_score += clf.score(X_train, y_train)
    tao_train_score += 1 - T.misclassification_loss(
Exemplo n.º 3
0
def test(n_runs, ls_train, ls_test, svm_train, svm_test, random_train, random_test, cart_train, tao_train, global_train, cart_test, tao_test, global_test):
    for run in range(n_runs):
        depth = 3
        oblique = False
        n_trees = 200
        n_iter = 5
        data = np.load('cancer_train.npy')
        y = np.load('cancer_label.npy')
        print ("Run -> ", run)
        idx = np.random.permutation(len(data))
        data = data[idx]
        y = y[idx]
        train_split = 0.50
        valid_split = 0.75
        #data = dataset.data[idx]
        #label = dataset.target[idx]
        train_id = int(len(data)*train_split)
        valid_id = int(len(data)*valid_split)
        X = data[0:train_id]
        labels = y[0:train_id]

        X_valid = data[train_id:valid_id]
        y_valid = y[train_id:valid_id]

        X_test = data[valid_id:]
        y_test = y[valid_id:]

        #CART
        clf = DecisionTreeClassifier(random_state=0, max_depth=depth, min_samples_leaf=4)
        clf.fit(X, labels)


        #TAO
        T = ClassificationTree(oblique = oblique)
        T.initialize_from_CART(X, labels, clf)
        tao = TAO(T)
        tao.evolve(X, labels)

        T.print_tree_structure()
        #LS

        '''
        L = ClassificationTree(oblique = oblique)
        L.initialize_from_CART(X, labels, clf)
        ls = LocalSearch(L)
        ls.evolve(X, labels, alfa=1000000, max_iteration=10)
        '''


        #SVM
        svm = LinearSVC(tol=1e-6, max_iter=10000, dual=False)
        svm.fit(X, labels)

        #RandomForest
        random_for = RandomForestClassifier(n_estimators = n_trees, max_depth=depth, random_state=0, min_samples_leaf= 4)
        random_for.fit(X, labels)

        #Genetic
        best_t, best_loss = genetic_tree_optimization(n_trees, n_iter, depth, X, labels, oblique, X_valid, y_valid, CR = 0, l = 0)
        #best_t.print_tree_structure()

        best_t.print_tree_structure()
        #Train Score
        cart_train.append(clf.score(X, labels))
        #ls_train.append(1-ClassificationTree.misclassification_loss(L.tree[0], X, labels, range(len(labels)), oblique))
        tao_train.append(1-ClassificationTree.misclassification_loss(T.tree[0], X, labels, range(len(labels)), oblique))
        global_train.append(1-ClassificationTree.misclassification_loss(best_t.tree[0], X, labels, range(len(labels)), oblique))
        svm_train.append(svm.score(X, labels))
        random_train.append(random_for.score(X, labels))

        #Test Score
        cart_test.append(clf.score(X_test, y_test))
        #ls_test.append(1-ClassificationTree.misclassification_loss(L.tree[0], X_test, y_test, range(len(y_test)), oblique))
        tao_test.append(1-ClassificationTree.misclassification_loss(T.tree[0], X_test, y_test, range(len(y_test)), oblique))
        global_test.append(1-ClassificationTree.misclassification_loss(best_t.tree[0], X_test, y_test, range(len(y_test)), oblique))
        svm_test.append(svm.score(X_test, y_test))
        random_test.append(random_for.score(X_test, y_test))