Пример #1
0
 def test_init(self):
     # df == None
     self.assertRaises(
         ValueError, lambda: ModelUtils(df=None,
                                        model=self.tree_clf,
                                        columns_lst=self.columns_lst,
                                        predicted_lbl=self.prd_lbl,
                                        actual_lbl=self.actl_lbl))
     # # clf == None
     self.assertRaises(
         ValueError,
         lambda: ModelUtils(df=self.iris_df,
                            model=None,
                            columns_lst=self.columns_lst,
                            predicted_lbl=self.prd_lbl,
                            actual_lbl=self.actl_lbl),
     )
     # clf missing
     self.assertRaises(
         ValueError, lambda: ModelUtils(df=self.iris_df,
                                        predicted_lbl=self.prd_lbl,
                                        actual_lbl=self.actl_lbl))
     mu = ModelUtils(df=self.iris_df,
                     model=self.tree_clf,
                     predicted_lbl=self.prd_lbl,
                     actual_lbl=self.actl_lbl)
     self.assertIsInstance(mu, ModelUtils)
Пример #2
0
def main():
    common = common_titanic_things(example_number='03')
    df = common.load_data("train.csv")
    create_age_estimator(common)
    # prepare the data
    df = prep_data(df, common)

    # wrire the data for later exploration
    df.to_csv(common.output_csv_name("data.csv"))
    exit()
    # create clf
    tree_clf = DecisionTreeClassifier()
    # print (tree_clf.get_params())
    param_grid = {
        # 'class_weight': None,
        'criterion': ['gini', 'entropy'],
        'max_depth': [3, 4, 5, 6],
        'max_features': [0.2, 0.5, 0.7, 0.9, 1.0],
        # 'max_leaf_nodes': None,
        'min_impurity_decrease': [0.0, 0.1, 0.5],
        # 'min_impurity_split': None,
        'min_samples_leaf': [3, 5, 8, 10, 12],
        'min_samples_split': [10],
        'min_weight_fraction_leaf': [0.0],
        'presort': [True],
        'random_state': [123456],
        # 'splitter': 'best'
    }
    clf_gs = GridSearchCV(tree_clf, param_grid=param_grid, cv=4)
    # split and train
    mu = ModelUtils(df=df,
                    model=clf_gs,
                    predicted_lbl=common.prd_lbl,
                    actual_lbl=common.actl_lbl,
                    is_verbose=True)
    mu.is_verbose = True
    print(mu.df.head())
    mu.split_and_train()

    # test model
    train_result_df = mu.test_model()

    # evaluate tested results using plot_confusion_matrix
    print(mu.confusion_matrix_as_dataframe())
    evp = EvaluationPlots(df=train_result_df,
                          actual_lbl=common.actl_lbl,
                          predicted_lbl=common.prd_lbl)
    evp.plot_confusion_matrix(confusion_matrix=mu.confusion_matrix(),
                              classes_lst=mu.model.classes_,
                              title="Titanic-confusion_matrix")
    # plt.savefig("confusion_matrix.png", bbox_inches='tight')
    cr = mu.classification_report(y_pred=train_result_df[common.prd_lbl],
                                  y_true=train_result_df[common.actl_lbl])
    print(cr)
    evp.plot_classification_report(cr)
    plt.show()
    common.prepare_kaggle_file(mu, prep_data)
    def __init__(self,
                 df,
                 lm=None,
                 lm_name="",
                 predicted_lbl=None,
                 actual_lbl=None,
                 columns_lst=[],
                 test_size=0.3,
                 random_state=123456):
        ModelUtils.__init__(self,
                            df=df,
                            model=lm,
                            model_name=lm_name,
                            predicted_lbl=predicted_lbl,
                            actual_lbl=actual_lbl,
                            columns_lst=columns_lst,
                            test_size=test_size,
                            random_state=random_state)

        return
Пример #4
0
def main():
    common = common_titanic_things(example_number='01')
    df = common.load_data("train.csv")

    # prepare the data
    df = prep_data(df, common)

    # naivly balance the data
    _df_sample = df[df.Survived == 1].sample(n=120, random_state=123456)
    df = pd.concat([df, _df_sample])

    # wrire the data for later exploration
    df.to_csv(common.output_csv_name("data.csv"))

    # create clf
    tree_clf = DecisionTreeClassifier(max_depth=5,
                                      min_samples_split=10,
                                      min_samples_leaf=10)

    # split and train
    mu = ModelUtils(df=df,
                    model=tree_clf,
                    predicted_lbl=common.prd_lbl,
                    actual_lbl=common.actl_lbl)
    mu.is_verbose = True
    print(mu.df.head())
    mu.split_and_train()

    # test model
    train_result_df = mu.test_model()

    # evaluate tested results using plot_confusion_matrix
    print(mu.confusion_matrix_as_dataframe())
    evp = EvaluationPlots(df=train_result_df,
                          actual_lbl=common.actl_lbl,
                          predicted_lbl=common.prd_lbl)
    evp.plot_confusion_matrix(confusion_matrix=mu.confusion_matrix(),
                              classes_lst=mu.model.classes_,
                              title="Titanic-confusion_matrix")
    # plt.savefig("confusion_matrix.png", bbox_inches='tight')
    cr = mu.classification_report(y_pred=train_result_df[common.prd_lbl],
                                  y_true=train_result_df[common.actl_lbl])
    print(cr)
    evp.plot_classification_report(cr)
    common.prepare_kaggle_file(mu, prep_data)
    plt.show()
Пример #5
0
 def setUp(self):
     ds = DatasetsTools(datasets.load_iris)
     self.iris_df = ds.data_as_df(target_column_name="IrisClass")
     self.boton_df = DatasetsTools(datasets.load_boston).data_as_df()
     self.tree_clf = DecisionTreeClassifier(max_depth=5,
                                            min_samples_split=10,
                                            min_samples_leaf=10)
     self.prd_lbl = "PrdictedIrisClass"
     self.actl_lbl = "IrisClass"
     self.columns_lst = list(self.iris_df)
     self.columns_lst.pop(-1)
     self.mu = ModelUtils(df=self.iris_df,
                          model=self.tree_clf,
                          columns_lst=self.columns_lst,
                          predicted_lbl=self.prd_lbl,
                          actual_lbl=self.actl_lbl)
Пример #6
0
from MachineLearningUtils.ModelsUtils import ModelUtils
from MachineLearningUtils.UsefulPlots import EvaluationPlots

# load iris data into DataFrame
prd_lbl, actl_lbl = "PrdictedIrisClass", "IrisClass"
iris_df = DatasetsTools(
    datasets.load_iris).data_as_df(target_column_name="IrisClass")

# set clf
tree_clf = DecisionTreeClassifier(max_depth=5,
                                  min_samples_split=10,
                                  min_samples_leaf=10)

# simple usage
mu = ModelUtils(df=iris_df,
                model=tree_clf,
                predicted_lbl=prd_lbl,
                actual_lbl=actl_lbl)
mu.split_and_train()
results_df = mu.test_model()

# evaluate results using plot_confusion_matrix
print(mu.confusion_matrix_as_dataframe())
evp = EvaluationPlots(df=results_df,
                      actual_lbl=actl_lbl,
                      predicted_lbl=prd_lbl)
evp.plot_confusion_matrix(confusion_matrix=mu.confusion_matrix(),
                          classes_lst=mu.model.classes_,
                          title="Iris-confusion_matrix")
# plt.savefig("confusion_matrix.png", bbox_inches='tight')

cr = mu.classification_report(y_pred=results_df[prd_lbl],