def test_AUTO_stopping_metric_with_no_sorting_metric_regression():
    print(
        "Check leaderboard with AUTO stopping metric and no sorting metric for regression"
    )
    ds = import_dataset('regression', split=False)
    exclude_algos = ["DeepLearning", "GLM"]
    aml = H2OAutoML(
        project_name=
        "py_aml_lb_test_auto_stopping_metric_no_sorting_regression",
        exclude_algos=exclude_algos,
        max_models=10,
        nfolds=2,
        stopping_rounds=1,
        stopping_tolerance=0.5,
        seed=automl_seed)
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(
        aml, exclude_algos,
        ["rmse", "mse", "mae", "rmsle", "mean_residual_deviance"], "rmse")
    base = get_partitioned_model_names(aml.leaderboard).base
    first = [m for m in base if 'XGBoost_1' in m]
    others = [m for m in base if m not in first]
    check_model_property(
        first, 'stopping_metric', True, None
    )  #if stopping_rounds == 0, actual value of stopping_metric is set to None
    check_model_property(others, 'stopping_metric', True, "deviance")
def test_AUTO_stopping_metric_with_no_sorting_metric_binary():
    print(
        "Check leaderboard with AUTO stopping metric and no sorting metric for binary"
    )
    ds = import_dataset('binary', split=False)
    exclude_algos = ["DeepLearning", "GLM", "StackedEnsemble"]
    aml = H2OAutoML(
        project_name="py_aml_lb_test_auto_stopping_metric_no_sorting_binary",
        seed=automl_seed,
        max_models=10,
        nfolds=2,
        stopping_rounds=1,
        stopping_tolerance=0.5,
        exclude_algos=exclude_algos)
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(
        aml, exclude_algos,
        ["auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse"],
        "auc", True)
    base = get_partitioned_model_names(aml.leaderboard).base
    first = [m for m in base if 'XGBoost_1' in m]
    others = [m for m in base if m not in first]
    check_model_property(
        first, 'stopping_metric', True, None
    )  #if stopping_rounds == 0, actual value of stopping_metric is set to None
    check_model_property(others, 'stopping_metric', True, "logloss")
def test_leaderboard_with_all_algos():
    print("Check leaderboard for all algorithms")
    ds = import_dataset('multiclass', split=False)
    aml = H2OAutoML(project_name="py_aml_lb_test_all_algos",
                    max_models=12,
                    nfolds=2,
                    stopping_rounds=1,
                    stopping_tolerance=0.5,
                    seed=automl_seed)
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(aml, [], ["mean_per_class_error", "logloss", "rmse", "mse"], "mean_per_class_error")
示例#4
0
def test_leaderboard_for_regression():
    print("Check leaderboard for Regression with default sorting")
    ds = import_dataset('regression', split=False)
    exclude_algos = ["GBM", "DeepLearning"]
    aml = H2OAutoML(project_name="py_aml_lb_test_default_regr_sort",
                    exclude_algos=exclude_algos,
                    max_models=8,
                    nfolds=2,
                    stopping_rounds=1,
                    stopping_tolerance=0.5,
                    seed=automl_seed)
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(aml, exclude_algos, ["mean_residual_deviance", "rmse", "mse", "mae", "rmsle"], "mean_residual_deviance")
def test_leaderboard_for_multiclass():
    print("Check leaderboard for multiclass with default sorting")
    ds = import_dataset('multiclass', split=False)
    exclude_algos = ["GBM", "DeepLearning"]
    aml = H2OAutoML(project_name="py_aml_lb_test_default_multiclass_sort",
                    seed=automl_seed,
                    max_models=8,
                    nfolds=2,
                    stopping_rounds=1,
                    stopping_tolerance=0.5,
                    exclude_algos=exclude_algos)
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(aml, exclude_algos,
                      ["mean_per_class_error", "logloss", "rmse", "mse"],
                      "mean_per_class_error")
def test_leaderboard_with_no_algos():
    print("Check leaderboard for excluding all algos (empty leaderboard)")
    ds = import_dataset('binary', split=False)
    exclude_algos = all_algos
    aml = H2OAutoML(project_name="py_aml_lb_test_no_algo",
                    exclude_algos=exclude_algos,
                    max_runtime_secs=10,
                    nfolds=2,
                    stopping_rounds=1,
                    stopping_tolerance=0.5,
                    seed=automl_seed)
    aml.train(y=ds.target, training_frame=ds.train)

    lb = aml.leaderboard
    assert lb.nrows == 0
    check_leaderboard(aml, exclude_algos, [], None, None)
示例#7
0
def test_AUTO_stopping_metric_with_custom_sorting_metric_regression():
    print("Check leaderboard with AUTO stopping metric and rmse sorting metric")
    ds = import_dataset('regression', split=False)
    exclude_algos = ["DeepLearning", "GLM"]
    aml = H2OAutoML(project_name="py_aml_lb_test_auto_stopping_metric_custom_sorting",
                    exclude_algos=exclude_algos,
                    max_models=10,
                    nfolds=2,
                    stopping_rounds=1,
                    stopping_tolerance=0.5,
                    seed=automl_seed,
                    sort_metric="rmse")
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(aml, exclude_algos, ["rmse", "mean_residual_deviance", "mse", "mae", "rmsle"], "rmse")
    base = get_partitioned_model_names(aml.leaderboard).base
    check_model_property(base, 'stopping_metric', True, "RMSE")
def test_leaderboard_for_binary_with_custom_sorting():
    print("Check leaderboard for Binomial sort by logloss")
    ds = import_dataset('binary', split=False)
    exclude_algos = ["GLM", "DeepLearning", "DRF"]
    aml = H2OAutoML(project_name="py_aml_lb_test_custom_binom_sort",
                    seed=automl_seed,
                    max_models=8,
                    nfolds=2,
                    stopping_rounds=1,
                    stopping_tolerance=0.5,
                    exclude_algos=exclude_algos,
                    sort_metric="logloss")
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(
        aml, exclude_algos,
        ["logloss", "auc", "aucpr", "mean_per_class_error", "rmse", "mse"],
        "logloss")
def test_AUTO_stopping_metric_with_auc_sorting_metric():
    print("Check leaderboard with AUTO stopping metric and auc sorting metric")
    ds = import_dataset('binary', split=False)
    exclude_algos = ["DeepLearning", "GLM", "StackedEnsemble"]
    aml = H2OAutoML(
        project_name="py_aml_lb_test_auto_stopping_metric_auc_sorting",
        seed=automl_seed,
        max_models=10,
        nfolds=2,
        stopping_rounds=1,
        stopping_tolerance=0.5,
        exclude_algos=exclude_algos,
        sort_metric='auc')
    aml.train(y=ds.target, training_frame=ds.train)

    check_leaderboard(
        aml, exclude_algos,
        ["auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse"],
        "auc", True)
    base = get_partitioned_model_names(aml.leaderboard).base
    check_model_property(base, 'stopping_metric', True, "logloss")