from sklearn import tree from utilities import load_magic04, load_wine, scale_data, train_model, tune_hyperparameters, model_complexity, learning_curve df, factors, response = load_wine() # df, factors, response = load_magic04() df_train, df_test = scale_data(df, response) classifier = tree.DecisionTreeClassifier() train_model(classifier, df_train, None, factors, response) tree.export_graphviz(classifier, out_file="tree_initial.dot") best_params = tune_hyperparameters(classifier, df_train, factors, response, { "max_depth": range(1, 20), "max_leaf_nodes": range(50, 150, 10) }) # "criterion": ["entropy","gini"] "max_leaf_nodes": range(50, 150, 10) "max_depth": range(1, 20) "min_samples_leaf": range(1, 20) "min_samples_split": range(2, 20) model_complexity( tree.DecisionTreeClassifier(max_leaf_nodes=best_params["max_leaf_nodes"]), df_train, factors, response, {"max_depth": range(1, 20)}, "max_depth") classifier = tree.DecisionTreeClassifier( max_depth=best_params["max_depth"], max_leaf_nodes=best_params["max_leaf_nodes"]) train_model(classifier, df_train, df_test, factors, response, "Final ") tree.export_graphviz(classifier, out_file="tree_pruned.dot") learning_curve(classifier, df_train, factors, response)
# Add a select box widget to the side dataset_name = st.sidebar.selectbox("Select Dataset", ("Iris", "Breast Cancer", "Wine")) classifier = st.sidebar.selectbox("Select Classifiers", ("KNN", "SVM", "Random Forest")) scaling = st.sidebar.checkbox("Scaling?") # Get the data X, y = utilities.get_dataset(dataset_name) st.write("Shape of the data:", X.shape) st.write("Number of Classes:", len(np.unique(y))) # Add parameters to the UI based on the classifier params = utilities.add_parameter_ui(classifier) # Get our classifier with the correct classifiers clf = utilities.get_classifier(classifier, params) # Check if scaling is required if scaling: X = utilities.scale_data(X) # Make predictions and get accuray accuracy = utilities.classification(X, y, clf) st.write("**Classifer:** ", classifier) st.write("**Accuracy:** ", accuracy) # Plot the components of the data utilities.plot_data(X, y)