def test_cohort_filter_classification_outcome(self): X_train, X_test, y_train, y_test, numeric, categorical = \ create_simple_titanic_data() feature_names = categorical + numeric clf = create_titanic_pipeline(X_train, y_train) categorical_features = categorical # the indexes 1, 2 correspond to false positives and false negatives filters = [{ 'arg': [1, 2], 'column': CLASSIFICATION_OUTCOME, 'method': 'includes' }] pred_y = clf.predict(X_test) validation_data = create_validation_data(X_test, y_test, pred_y) validation_filter = validation_data[PRED_Y] != validation_data[TRUE_Y] validation_data = validation_data.loc[validation_filter] validation_data = validation_data.drop(columns=PRED_Y) model_task = ModelTask.CLASSIFICATION run_error_analyzer(validation_data, clf, X_test, y_test, feature_names, categorical_features, model_task, filters=filters)
def test_importances_titanic(self): X_train, X_test, y_train, y_test, numeric, categorical = \ create_simple_titanic_data() feature_names = categorical + numeric clf = create_titanic_pipeline(X_train, y_train) categorical_features = categorical run_error_analyzer(clf, X_test, y_test, feature_names, categorical_features)
def test_matrix_filter_titanic(self): x_train, x_test, y_train, y_test, numeric, categorical = \ create_simple_titanic_data() feature_names = categorical + numeric clf = create_titanic_pipeline(x_train, y_train) categorical_features = categorical run_error_analyzer(clf, x_test, y_test, feature_names, categorical_features)
def test_matrix_filter_titanic(self): (X_train, X_test, y_train, y_test, numeric, categorical) = create_simple_titanic_data() feature_names = categorical + numeric clf = create_titanic_pipeline(X_train, y_train) categorical_features = categorical run_error_analyzer(clf, X_test, y_test, feature_names, categorical_features, model_task=ModelTask.CLASSIFICATION)
def test_cohort_filter_excludes(self): X_train, X_test, y_train, y_test, numeric, categorical = \ create_simple_titanic_data() feature_names = categorical + numeric clf = create_titanic_pipeline(X_train, y_train) categorical_features = categorical # the indexes other than 0, 2 correspond to Q filters = [{'arg': [0, 2], 'column': EMBARKED, 'method': 'excludes'}] validation_data = create_validation_data(X_test, y_test) filter_embarked = X_test[EMBARKED].isin(['Q']) validation_data = validation_data.loc[filter_embarked] model_task = ModelTask.CLASSIFICATION run_error_analyzer(validation_data, clf, X_test, y_test, feature_names, categorical_features, model_task, filters=filters)
def test_invalid_comparison_titanic(self, analyzer_type): (X_train, X_test, y_train, y_test, numeric, categorical) = create_simple_titanic_data() tree_features = [STRING_INDEX] feature_names = categorical + numeric + tree_features # Create a bad dummy string categorical feature X_train = add_string_index_col(X_train) X_test = add_string_index_col(X_test) clf = create_titanic_pipeline(X_train, y_train) categorical_features = categorical tree_features = tree_features + numeric with pytest.raises(TypeError) as ve: run_error_analyzer(clf, X_test, y_test, feature_names, analyzer_type, categorical_features, tree_features, model_task=ModelTask.CLASSIFICATION) assert ('Column string_index of type string is incorrectly treated ' 'as numeric with threshold value') in str(ve.value)