Exemplo n.º 1
0
def test_unmarked_categorical_column_throws_warning():
    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output'
        # This is the column we are "forgetting" to mark as categorical
        # , 'sex': 'categorical'
        ,
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    with warnings.catch_warnings(record=True) as caught_w:

        ml_predictor.train(df_titanic_train)
        print(
            'we should be throwing a warning for the user to give them useful feedback on the unlabeled categorical column'
        )
        assert len(caught_w) == 1

    ml_predictor.predict(df_titanic_test)

    # We want to make sure the above does not throw an error
    assert True
Exemplo n.º 2
0
def test_unexpected_datetime_column_handled_without_errors():
    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'sex': 'categorical',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train)

    test_dict = df_titanic_test.sample(frac=0.1).to_dict('records')[0]

    test_dict['unexpected_column'] = datetime.date.today()
    test_dict['anoter_unexpected_column'] = datetime.datetime.today()

    ml_predictor.predict(test_dict)

    # We want to make sure the above does not throw an error
    assert True
Exemplo n.º 3
0
                'LGBMClassifier',
                'CatBoostClassifier',
                'XGBClassifier',
            ],  # chose all Classifiers
            #optimize_final_model=True,   # falls with an error :(
            #feature_learning=True,
            #fl_data=X_test.copy(),
            ml_for_analytics=False,
            verbose=False,
        )

        print(f'PREDICT ON FOLD {count}')
        predictions = automl.predict_proba(X_test)
        print('AUC: ', roc_auc_score(y_test, predictions[:, 1]))
        y_test_predict_proba = predictions[:, 1]
        y_test_predict = automl.predict(X_test)

        END_EXPERIMENT = time.time()

        #preds = pd.DataFrame(predictions)
        #preds['Y'] = y_test.reset_index(drop=True)
        # preds.to_csv(f'./result/predicts/{DATASET_NAME}_{MODEL_NAME}_predict_proba_exp_{EXPERIMENT}.csv', index=False,)

        metrics.append({
            'AUC':
            round(roc_auc_score(y_test, y_test_predict_proba), 4),
            'log_loss':
            round(log_loss(y_test, y_test_predict_proba), 4),
            'Accuracy':
            round(accuracy_score(y_test, y_test_predict), 4),
            'Time_min': (END_EXPERIMENT - START_EXPERIMENT) // 60,