def test_saving_basic_ensemble_classifier(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.make_titanic_ensemble(df_titanic_train) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) probas = saved_ml_pipeline.predict_proba(df_titanic_test) probas = [proba[1] for proba in probas] # print(probas) test_score = utils.calculate_brier_score_loss(df_titanic_test.survived, probas) print('test_score') print(test_score) # Very rough ensembles don't do as well on this problem as a standard GradientBoostingClassifier does # Right now we're getting a score of -.22 # Make sure our score is good, but not unreasonably good assert -0.225 < test_score < -0.17
def test_optimize_final_model_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) # Take a third of our test data (a tenth of our overall data) for calibration df_titanic_test, df_titanic_calibration = train_test_split(df_titanic_test, test_size=0.33, random_state=42) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, calibrate_final_model=True, X_test=df_titanic_calibration, y_test=df_titanic_calibration.survived) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_unmarked_categorical_column_throws_warning(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output' # This is the column we are "forgetting" to mark as categorical # , 'sex': 'categorical' , 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) with warnings.catch_warnings(record=True) as caught_w: ml_predictor.train(df_titanic_train) print( 'we should be throwing a warning for the user to give them useful feedback on the unlabeled categorical column' ) assert len(caught_w) == 1 ml_predictor.predict(df_titanic_test) # We want to make sure the above does not throw an error assert True
def test_verify_features_does_not_work_by_default(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) try: keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5' os.remove(keras_file_name) except: pass with warnings.catch_warnings(record=True) as w: results = saved_ml_pipeline.named_steps['final_model'].verify_features( df_titanic_test) print('Here are the caught warnings:') print(w) assert len(w) == 1 assert results == None
def test_linear_model_analytics_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names='RidgeClassifier') test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Linear models aren't super great on this dataset... assert -0.21 < test_score < -0.17
def test_include_bad_y_vals_train_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) df_titanic_train.iloc[1]['survived'] = None df_titanic_train.iloc[8]['survived'] = None df_titanic_train.iloc[26]['survived'] = None ml_predictor.train(df_titanic_train) test_score = ml_predictor.score(df_titanic_test.to_dict('records'), df_titanic_test.survived) print('test_score') print(test_score) assert -0.17 < test_score < -0.135
def test_list_of_single_model_name_classification(): np.random.seed(0) model_name = 'GradientBoostingClassifier' df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names=[model_name]) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.16 < test_score < -0.135
def test_unexpected_datetime_column_handled_without_errors(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train) test_dict = df_titanic_test.sample(frac=0.1).to_dict('records')[0] test_dict['unexpected_column'] = datetime.date.today() test_dict['anoter_unexpected_column'] = datetime.datetime.today() ml_predictor.predict(test_dict) # We want to make sure the above does not throw an error assert True
def test_verify_features_does_not_work_by_default(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, perform_feature_scaling=False, model_names=['DeepLearningClassifier']) file_name = ml_predictor.save(str(random.random())) saved_ml_pipeline = utils_models.load_keras_model(file_name) os.remove(file_name) test_score = saved_ml_pipeline.score(df_titanic_test, df_titanic_test.survived) assert -0.25 < test_score < -0.17
def test_pass_in_list_of_dictionaries_predict_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) list_titanic_train = df_titanic_train.to_dict('records') ml_predictor.train(df_titanic_train, model_names=model_name) test_score = ml_predictor.score(df_titanic_test.to_dict('records'), df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def ensemble_classifier_basic_test(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'sex': 'categorical' , 'embarked': 'categorical' , 'pclass': 'categorical' } ensemble_config = [ { 'model_name': 'LGBMClassifier' } , { 'model_name': 'RandomForestClassifier' } ] ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, ensemble_config=ensemble_config) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.15 < test_score < -0.131
def test_categorical_ensemble_basic_classifier(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'pclass': 'categorical', 'embarked': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train_categorical_ensemble(df_titanic_train, categorical_column='pclass', optimize_final_model=False) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Small sample sizes mean there's a fair bit of noise here assert -0.226 < test_score < -0.17
def test_throws_warning_when_fl_data_equals_df_train(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) with warnings.catch_warnings(record=True) as w: try: ml_predictor.train(df_titanic_train, feature_learning=True, fl_data=df_titanic_train) except KeyError as e: pass # We should not be getting to this line- we should be throwing an error above for thing in w: print(thing) assert len(w) >= 1 assert True
def test_perform_feature_scaling_true_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, perform_feature_scaling=True, model_names=['LGBMClassifier']) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_perform_feature_scaling_true_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, perform_feature_scaling=True, model_names=model_name) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) lower_bound = -0.215 if model_name == 'DeepLearningClassifier': lower_bound = -0.235 assert lower_bound < test_score < -0.17
def test_select_from_multiple_classification_models_using_X_test_and_y_test(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names=[ 'LogisticRegression', 'RandomForestClassifier', 'RidgeClassifier', 'GradientBoostingClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier', 'SGDClassifier', 'Perceptron', 'PassiveAggressiveClassifier' ], X_test=df_titanic_test, y_test=df_titanic_test.survived) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_X_test_and_y_test_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, X_test=df_titanic_test, y_test=df_titanic_test.survived) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_optimize_entire_pipeline_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, optimize_entire_pipeline=True, model_names=['DeepLearningClassifier']) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.25 < test_score < -0.17
def test_include_bad_y_vals_predict_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) df_titanic_test.ix[1, 'survived'] = float('nan') df_titanic_test.ix[8, 'survived'] = float('inf') df_titanic_test.ix[26, 'survived'] = None ml_predictor.train(df_titanic_train, model_names=model_name) test_score = ml_predictor.score(df_titanic_test.to_dict('records'), df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def categorical_ensembling_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train_categorical_ensemble(df_titanic_train, model_names=model_name, categorical_column='embarked') test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) lower_bound = -0.18 if model_name == 'DeepLearningClassifier': lower_bound = -0.215 if model_name == 'CatBoostClassifier': lower_bound = -0.25 assert lower_bound < test_score < -0.145
def test_all_algos_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train( df_titanic_train, model_names=[ 'LogisticRegression', 'RandomForestClassifier', 'RidgeClassifier', 'GradientBoostingClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier', 'SGDClassifier', 'Perceptron', 'PassiveAggressiveClassifier', 'DeepLearningClassifier', 'XGBClassifier', 'LGBMClassifier', 'LinearSVC' ]) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Linear models aren't super great on this dataset... assert -0.215 < test_score < -0.131
def optimize_final_model_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, optimize_final_model=True, model_names=model_name) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Small sample sizes mean there's a fair bit of noise here lower_bound = -0.215 if model_name == 'DeepLearningClassifier': lower_bound = -0.25 assert lower_bound < test_score < -0.17
def test_compare_all_models_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, compare_all_models=True, model_names=model_name) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_verify_features_finds_no_missing_features_when_none_are_missing(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, verify_features=True) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) missing_features = saved_ml_pipeline.named_steps[ 'final_model'].verify_features(df_titanic_test) print('missing_features') print(missing_features) print("len(missing_features['prediction_not_training'])") print(len(missing_features['prediction_not_training'])) print("len(missing_features['training_not_prediction'])") print(len(missing_features['training_not_prediction'])) assert len(missing_features['prediction_not_training']) == 0 assert len(missing_features['training_not_prediction']) == 0
def test_already_transformed_X(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) # Take a third of our test data (a tenth of our overall data) for calibration df_titanic_test, df_titanic_calibration = train_test_split(df_titanic_test, test_size=0.33, random_state=42) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) # pass in return_trans_pipeline, and get the trans pipeline trans_pipeline = ml_predictor.train(df_titanic_train, model_names='LogisticRegression', return_transformation_pipeline=True) # get transformed X through transformation_only X_train_transformed = ml_predictor.transform_only(df_titanic_train) # create a new predictor ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) # pass in trained trans pipeline, and make sure it works ml_predictor.train(df_titanic_train, trained_transformation_pipeline=trans_pipeline) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.14 < test_score < -0.12 # pass in both a trans pipeline and a previously transformed X, and make sure that works ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(None, trained_transformation_pipeline=trans_pipeline, transformed_X=X_train_transformed, transformed_y=df_titanic_train.survived) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.14 < test_score < -0.12
def test_binary_classification(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived, verbose=0) # Right now we're getting a score of -.205 # Make sure our score is good, but not unreasonably good assert -0.215 < test_score < -0.17
def test_bad_val_for_type_of_estimator(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { # 'survived': 'output' 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='invalid_type_of_estimator', column_descriptions=column_descriptions)
def test_basic_ensemble_classifier(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.make_titanic_ensemble(df_titanic_train) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived, verbose=0) # Very rough ensembles don't do as well on this problem as a standard GradientBoostingClassifier does # Right now we're getting a score of -.22 # Make sure our score is good, but not unreasonably good assert -0.225 < test_score < -0.17
def test_missing_output_col_in_column_descriptions(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { # 'survived': 'output' 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)
def test_binary_classification_predict_on_Predictor_instance(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) predictions = ml_predictor.predict(df_titanic_test) test_score = accuracy_score(predictions, df_titanic_test.survived) # Make sure our score is good, but not unreasonably good print(test_score) assert .77 < test_score < .805