示例#1
0
def test_custom_metric_config_experiment_with_kappa_filename():
    """Test config with metric defined in a file named kappa"""

    # Run experiment
    input_dir = join(_my_dir, "other")
    train_file = join(input_dir, "examples_train.jsonlines")
    test_file = join(input_dir, "examples_test.jsonlines")
    config_path = fill_in_config_paths_for_single_file(
        join(_my_dir, "configs", "test_custom_metrics_"
             "kappa.template.cfg"), train_file, test_file)
    run_configuration(config_path, local=True, quiet=True)

    # Check results for objective functions and output metrics

    # objective function f075_macro
    with open(
            join(_my_dir, 'output', ('test_custom_metrics_kappa_train_'
                                     'examples_train.jsonlines_test_'
                                     'examples_test.jsonlines_'
                                     'LogisticRegression.results.json'))) as f:
        result_dict = json.load(f)[0]

    test_objective_value = result_dict['score']
    test_output_metrics_dict = result_dict['additional_scores']
    test_accuracy_value = test_output_metrics_dict["balanced_accuracy"]
    test_dummy_metric_value = test_output_metrics_dict["dummy_metric"]

    # check that the values are as expected
    assert_almost_equal(test_objective_value, 0.9699, places=4)
    assert_almost_equal(test_accuracy_value, 0.9792, places=4)
    eq_(test_dummy_metric_value, 1.0)
示例#2
0
def test_config_with_inverted_custom_metric():
    """Test config with a lower-is-better custom metric"""

    # run the first experiment that uses a lower-is-better custom metric
    # for grid saerch defined as simply 1 minus the macro-averaged F1 score
    input_dir = join(_my_dir, "other")
    train_file = join(input_dir, "examples_train.jsonlines")
    test_file = join(input_dir, "examples_test.jsonlines")
    config_path1 = fill_in_config_paths_for_single_file(
        join(_my_dir, "configs", "test_custom_"
             "metrics_kwargs1"
             ".template.cfg"), train_file, test_file)
    run_configuration(config_path1, local=True, quiet=True)

    # laod the results
    with open(
            join(_my_dir, 'output', ('test_custom_metrics_kwargs1_train_'
                                     'examples_train.jsonlines_'
                                     'LogisticRegression.results.json'))) as f:
        result_dict1 = json.load(f)
        grid_score1 = result_dict1['grid_score']
        grid_results_dict1 = result_dict1['grid_search_cv_results']

    # now run the second experiment that is identical except that
    # that it uses the regular macro-averaged F1 score for grid search
    config_path2 = fill_in_config_paths_for_single_file(
        join(_my_dir, "configs", "test_custom_"
             "metrics_kwargs2"
             ".template.cfg"), train_file, test_file)
    run_configuration(config_path2, local=True, quiet=True)

    # laod the results
    with open(
            join(_my_dir, 'output', ('test_custom_metrics_kwargs2_train_'
                                     'examples_train.jsonlines_'
                                     'LogisticRegression.results.json'))) as f:
        result_dict2 = json.load(f)
        grid_score2 = result_dict2['grid_score']
        grid_results_dict2 = result_dict2['grid_search_cv_results']

    # for both experiments the ranking of the C hyperparameter should be
    # should be the identical since when we defined one_minus_precision
    # we set the `greater_is_better` keyword argument to `False`
    assert_array_equal(grid_results_dict1['rank_test_score'],
                       grid_results_dict2['rank_test_score'])

    # furthermore, the final grid score and the mean scores for each
    # C hyperparameter value should follow the same 1-X relationship
    # except that our custom metric should be negated due to the
    # keyword argument that we set when we defined it
    assert_almost_equal(1 - grid_score2, -1 * grid_score1, places=6)
    assert_array_almost_equal(
        1 - np.array(grid_results_dict2['mean_test_score']),
        -1 * np.array(grid_results_dict1['mean_test_score']),
        decimal=6)
示例#3
0
def test_config_with_custom_prob_metric():
    """Test config with custom probabilistic metric"""

    # run the first experiment that uses a custom probabilistic metric
    # for grid search but with a learner that does not produce probabilities
    input_dir = join(_my_dir, "other")
    train_file = join(input_dir, "examples_train.jsonlines")
    test_file = join(input_dir, "examples_test.jsonlines")
    config_path = fill_in_config_paths_for_single_file(
        join(_my_dir, "configs", "test_custom_"
             "metrics_kwargs3"
             ".template.cfg"), train_file, test_file)

    # this should fail as expected
    assert_raises_regex(AttributeError,
                        r"has no attribute 'predict_proba'",
                        run_configuration,
                        config_path,
                        local=True,
                        quiet=True)

    # now run the second experiment that is identical except that
    # the learner now produces probabilities
    config_path = fill_in_config_paths_for_single_file(
        join(_my_dir, "configs", "test_custom_"
             "metrics_kwargs4"
             ".template.cfg"), train_file, test_file)
    # this should succeed and produce results
    run_configuration(config_path, local=True, quiet=True)

    # laod the results and verify them
    with open(
            join(_my_dir, 'output', ('test_custom_metrics_kwargs4_train_'
                                     'examples_train.jsonlines_'
                                     'SVC.results.json'))) as f:
        result_dict = json.load(f)
        grid_score = result_dict['grid_score']

    ok_(grid_score > 0.95)