def test_init(self): # df == None self.assertRaises( ValueError, lambda: ModelUtils(df=None, model=self.tree_clf, columns_lst=self.columns_lst, predicted_lbl=self.prd_lbl, actual_lbl=self.actl_lbl)) # # clf == None self.assertRaises( ValueError, lambda: ModelUtils(df=self.iris_df, model=None, columns_lst=self.columns_lst, predicted_lbl=self.prd_lbl, actual_lbl=self.actl_lbl), ) # clf missing self.assertRaises( ValueError, lambda: ModelUtils(df=self.iris_df, predicted_lbl=self.prd_lbl, actual_lbl=self.actl_lbl)) mu = ModelUtils(df=self.iris_df, model=self.tree_clf, predicted_lbl=self.prd_lbl, actual_lbl=self.actl_lbl) self.assertIsInstance(mu, ModelUtils)
def main(): common = common_titanic_things(example_number='03') df = common.load_data("train.csv") create_age_estimator(common) # prepare the data df = prep_data(df, common) # wrire the data for later exploration df.to_csv(common.output_csv_name("data.csv")) exit() # create clf tree_clf = DecisionTreeClassifier() # print (tree_clf.get_params()) param_grid = { # 'class_weight': None, 'criterion': ['gini', 'entropy'], 'max_depth': [3, 4, 5, 6], 'max_features': [0.2, 0.5, 0.7, 0.9, 1.0], # 'max_leaf_nodes': None, 'min_impurity_decrease': [0.0, 0.1, 0.5], # 'min_impurity_split': None, 'min_samples_leaf': [3, 5, 8, 10, 12], 'min_samples_split': [10], 'min_weight_fraction_leaf': [0.0], 'presort': [True], 'random_state': [123456], # 'splitter': 'best' } clf_gs = GridSearchCV(tree_clf, param_grid=param_grid, cv=4) # split and train mu = ModelUtils(df=df, model=clf_gs, predicted_lbl=common.prd_lbl, actual_lbl=common.actl_lbl, is_verbose=True) mu.is_verbose = True print(mu.df.head()) mu.split_and_train() # test model train_result_df = mu.test_model() # evaluate tested results using plot_confusion_matrix print(mu.confusion_matrix_as_dataframe()) evp = EvaluationPlots(df=train_result_df, actual_lbl=common.actl_lbl, predicted_lbl=common.prd_lbl) evp.plot_confusion_matrix(confusion_matrix=mu.confusion_matrix(), classes_lst=mu.model.classes_, title="Titanic-confusion_matrix") # plt.savefig("confusion_matrix.png", bbox_inches='tight') cr = mu.classification_report(y_pred=train_result_df[common.prd_lbl], y_true=train_result_df[common.actl_lbl]) print(cr) evp.plot_classification_report(cr) plt.show() common.prepare_kaggle_file(mu, prep_data)
def __init__(self, df, lm=None, lm_name="", predicted_lbl=None, actual_lbl=None, columns_lst=[], test_size=0.3, random_state=123456): ModelUtils.__init__(self, df=df, model=lm, model_name=lm_name, predicted_lbl=predicted_lbl, actual_lbl=actual_lbl, columns_lst=columns_lst, test_size=test_size, random_state=random_state) return
def main(): common = common_titanic_things(example_number='01') df = common.load_data("train.csv") # prepare the data df = prep_data(df, common) # naivly balance the data _df_sample = df[df.Survived == 1].sample(n=120, random_state=123456) df = pd.concat([df, _df_sample]) # wrire the data for later exploration df.to_csv(common.output_csv_name("data.csv")) # create clf tree_clf = DecisionTreeClassifier(max_depth=5, min_samples_split=10, min_samples_leaf=10) # split and train mu = ModelUtils(df=df, model=tree_clf, predicted_lbl=common.prd_lbl, actual_lbl=common.actl_lbl) mu.is_verbose = True print(mu.df.head()) mu.split_and_train() # test model train_result_df = mu.test_model() # evaluate tested results using plot_confusion_matrix print(mu.confusion_matrix_as_dataframe()) evp = EvaluationPlots(df=train_result_df, actual_lbl=common.actl_lbl, predicted_lbl=common.prd_lbl) evp.plot_confusion_matrix(confusion_matrix=mu.confusion_matrix(), classes_lst=mu.model.classes_, title="Titanic-confusion_matrix") # plt.savefig("confusion_matrix.png", bbox_inches='tight') cr = mu.classification_report(y_pred=train_result_df[common.prd_lbl], y_true=train_result_df[common.actl_lbl]) print(cr) evp.plot_classification_report(cr) common.prepare_kaggle_file(mu, prep_data) plt.show()
def setUp(self): ds = DatasetsTools(datasets.load_iris) self.iris_df = ds.data_as_df(target_column_name="IrisClass") self.boton_df = DatasetsTools(datasets.load_boston).data_as_df() self.tree_clf = DecisionTreeClassifier(max_depth=5, min_samples_split=10, min_samples_leaf=10) self.prd_lbl = "PrdictedIrisClass" self.actl_lbl = "IrisClass" self.columns_lst = list(self.iris_df) self.columns_lst.pop(-1) self.mu = ModelUtils(df=self.iris_df, model=self.tree_clf, columns_lst=self.columns_lst, predicted_lbl=self.prd_lbl, actual_lbl=self.actl_lbl)
from MachineLearningUtils.ModelsUtils import ModelUtils from MachineLearningUtils.UsefulPlots import EvaluationPlots # load iris data into DataFrame prd_lbl, actl_lbl = "PrdictedIrisClass", "IrisClass" iris_df = DatasetsTools( datasets.load_iris).data_as_df(target_column_name="IrisClass") # set clf tree_clf = DecisionTreeClassifier(max_depth=5, min_samples_split=10, min_samples_leaf=10) # simple usage mu = ModelUtils(df=iris_df, model=tree_clf, predicted_lbl=prd_lbl, actual_lbl=actl_lbl) mu.split_and_train() results_df = mu.test_model() # evaluate results using plot_confusion_matrix print(mu.confusion_matrix_as_dataframe()) evp = EvaluationPlots(df=results_df, actual_lbl=actl_lbl, predicted_lbl=prd_lbl) evp.plot_confusion_matrix(confusion_matrix=mu.confusion_matrix(), classes_lst=mu.model.classes_, title="Iris-confusion_matrix") # plt.savefig("confusion_matrix.png", bbox_inches='tight') cr = mu.classification_report(y_pred=results_df[prd_lbl],