예제 #1
0
from sklearn import tree
from utilities import load_magic04, load_wine, scale_data, train_model, tune_hyperparameters, model_complexity, learning_curve

df, factors, response = load_wine()
# df, factors, response = load_magic04()
df_train, df_test = scale_data(df, response)

classifier = tree.DecisionTreeClassifier()
train_model(classifier, df_train, None, factors, response)
tree.export_graphviz(classifier, out_file="tree_initial.dot")

best_params = tune_hyperparameters(classifier, df_train, factors, response, {
    "max_depth": range(1, 20),
    "max_leaf_nodes": range(50, 150, 10)
})
# "criterion": ["entropy","gini"] "max_leaf_nodes": range(50, 150, 10) "max_depth": range(1, 20) "min_samples_leaf": range(1, 20) "min_samples_split": range(2, 20)

model_complexity(
    tree.DecisionTreeClassifier(max_leaf_nodes=best_params["max_leaf_nodes"]),
    df_train, factors, response, {"max_depth": range(1, 20)}, "max_depth")

classifier = tree.DecisionTreeClassifier(
    max_depth=best_params["max_depth"],
    max_leaf_nodes=best_params["max_leaf_nodes"])
train_model(classifier, df_train, df_test, factors, response, "Final ")
tree.export_graphviz(classifier, out_file="tree_pruned.dot")

learning_curve(classifier, df_train, factors, response)
# Add a select box widget to the side
dataset_name = st.sidebar.selectbox("Select Dataset",
                                    ("Iris", "Breast Cancer", "Wine"))

classifier = st.sidebar.selectbox("Select Classifiers",
                                  ("KNN", "SVM", "Random Forest"))

scaling = st.sidebar.checkbox("Scaling?")

# Get the data
X, y = utilities.get_dataset(dataset_name)
st.write("Shape of the data:", X.shape)
st.write("Number of Classes:", len(np.unique(y)))

# Add parameters to the UI based on the classifier
params = utilities.add_parameter_ui(classifier)

# Get our classifier with the correct classifiers
clf = utilities.get_classifier(classifier, params)

# Check if scaling is required
if scaling:
    X = utilities.scale_data(X)

# Make predictions and get accuray
accuracy = utilities.classification(X, y, clf)
st.write("**Classifer:** ", classifier)
st.write("**Accuracy:** ", accuracy)

# Plot the components of the data
utilities.plot_data(X, y)