def test_custom_metric_config_experiment_with_kappa_filename(): """Test config with metric defined in a file named kappa""" # Run experiment input_dir = join(_my_dir, "other") train_file = join(input_dir, "examples_train.jsonlines") test_file = join(input_dir, "examples_test.jsonlines") config_path = fill_in_config_paths_for_single_file( join(_my_dir, "configs", "test_custom_metrics_" "kappa.template.cfg"), train_file, test_file) run_configuration(config_path, local=True, quiet=True) # Check results for objective functions and output metrics # objective function f075_macro with open( join(_my_dir, 'output', ('test_custom_metrics_kappa_train_' 'examples_train.jsonlines_test_' 'examples_test.jsonlines_' 'LogisticRegression.results.json'))) as f: result_dict = json.load(f)[0] test_objective_value = result_dict['score'] test_output_metrics_dict = result_dict['additional_scores'] test_accuracy_value = test_output_metrics_dict["balanced_accuracy"] test_dummy_metric_value = test_output_metrics_dict["dummy_metric"] # check that the values are as expected assert_almost_equal(test_objective_value, 0.9699, places=4) assert_almost_equal(test_accuracy_value, 0.9792, places=4) eq_(test_dummy_metric_value, 1.0)
def test_config_with_inverted_custom_metric(): """Test config with a lower-is-better custom metric""" # run the first experiment that uses a lower-is-better custom metric # for grid saerch defined as simply 1 minus the macro-averaged F1 score input_dir = join(_my_dir, "other") train_file = join(input_dir, "examples_train.jsonlines") test_file = join(input_dir, "examples_test.jsonlines") config_path1 = fill_in_config_paths_for_single_file( join(_my_dir, "configs", "test_custom_" "metrics_kwargs1" ".template.cfg"), train_file, test_file) run_configuration(config_path1, local=True, quiet=True) # laod the results with open( join(_my_dir, 'output', ('test_custom_metrics_kwargs1_train_' 'examples_train.jsonlines_' 'LogisticRegression.results.json'))) as f: result_dict1 = json.load(f) grid_score1 = result_dict1['grid_score'] grid_results_dict1 = result_dict1['grid_search_cv_results'] # now run the second experiment that is identical except that # that it uses the regular macro-averaged F1 score for grid search config_path2 = fill_in_config_paths_for_single_file( join(_my_dir, "configs", "test_custom_" "metrics_kwargs2" ".template.cfg"), train_file, test_file) run_configuration(config_path2, local=True, quiet=True) # laod the results with open( join(_my_dir, 'output', ('test_custom_metrics_kwargs2_train_' 'examples_train.jsonlines_' 'LogisticRegression.results.json'))) as f: result_dict2 = json.load(f) grid_score2 = result_dict2['grid_score'] grid_results_dict2 = result_dict2['grid_search_cv_results'] # for both experiments the ranking of the C hyperparameter should be # should be the identical since when we defined one_minus_precision # we set the `greater_is_better` keyword argument to `False` assert_array_equal(grid_results_dict1['rank_test_score'], grid_results_dict2['rank_test_score']) # furthermore, the final grid score and the mean scores for each # C hyperparameter value should follow the same 1-X relationship # except that our custom metric should be negated due to the # keyword argument that we set when we defined it assert_almost_equal(1 - grid_score2, -1 * grid_score1, places=6) assert_array_almost_equal( 1 - np.array(grid_results_dict2['mean_test_score']), -1 * np.array(grid_results_dict1['mean_test_score']), decimal=6)
def test_config_with_custom_prob_metric(): """Test config with custom probabilistic metric""" # run the first experiment that uses a custom probabilistic metric # for grid search but with a learner that does not produce probabilities input_dir = join(_my_dir, "other") train_file = join(input_dir, "examples_train.jsonlines") test_file = join(input_dir, "examples_test.jsonlines") config_path = fill_in_config_paths_for_single_file( join(_my_dir, "configs", "test_custom_" "metrics_kwargs3" ".template.cfg"), train_file, test_file) # this should fail as expected assert_raises_regex(AttributeError, r"has no attribute 'predict_proba'", run_configuration, config_path, local=True, quiet=True) # now run the second experiment that is identical except that # the learner now produces probabilities config_path = fill_in_config_paths_for_single_file( join(_my_dir, "configs", "test_custom_" "metrics_kwargs4" ".template.cfg"), train_file, test_file) # this should succeed and produce results run_configuration(config_path, local=True, quiet=True) # laod the results and verify them with open( join(_my_dir, 'output', ('test_custom_metrics_kwargs4_train_' 'examples_train.jsonlines_' 'SVC.results.json'))) as f: result_dict = json.load(f) grid_score = result_dict['grid_score'] ok_(grid_score > 0.95)