Exemplo n.º 1
0
    y = df['prognosis']
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    print("DecisionTree")
    dt = DecisionTreeClassifier()
    clf_dt = dt.fit(x_train, y_train)
    print("Acurracy: ", clf_dt.score(x_test, y_test))

    print("cross result========")
    scores = cross_val_score(dt, x_test, y_test, cv=3)
    print(scores)
    print(scores.mean())
    export_graphviz(dt, out_file='diseaseData/tree.dot', feature_names=cols)
    dt.__getstate__()
    importances = dt.feature_importances_
    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    features = cols
    for f in range(5):
        print(
            "%d. feature %d - %s (%f)" %
            (f + 1, indices[f], features[indices[f]], importances[indices[f]]))
    export_graphviz(dt,
                    out_file='diseaseData/tree-top5.dot',
                    feature_names=cols,
                    max_depth=5)
    Image(filename='tree-top5.png')