def test_cbm_init(): with pytest.raises( ValueError, match="Parameters to CostBenefitMatrix must all be numeric values." ): CostBenefitMatrix(true_positive=None, true_negative=-1, false_positive=-7, false_negative=-2) with pytest.raises( ValueError, match="Parameters to CostBenefitMatrix must all be numeric values." ): CostBenefitMatrix(true_positive=1, true_negative=-1, false_positive=None, false_negative=-2) with pytest.raises( ValueError, match="Parameters to CostBenefitMatrix must all be numeric values." ): CostBenefitMatrix(true_positive=1, true_negative=None, false_positive=-7, false_negative=-2) with pytest.raises( ValueError, match="Parameters to CostBenefitMatrix must all be numeric values." ): CostBenefitMatrix(true_positive=3, true_negative=-1, false_positive=-7, false_negative=None)
def test_cbm_objective_function_floats(): y_true = pd.Series([0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) y_predicted = pd.Series([0, 0, 1, 0, 0, 0, 0, 1, 1, 1]) cbm = CostBenefitMatrix(true_positive=5.1, true_negative=-1.2, false_positive=-6.7, false_negative=-0.1) assert np.isclose(cbm.objective_function(y_true, y_predicted), ((3 * 5.1) + (-1.2 * 2) + (1 * -6.7) + (4 * -0.1)) / 10)
def test_cbm_zero_input_lengths(): cbm = CostBenefitMatrix(true_positive=10, true_negative=-1, false_positive=-7, false_negative=-2) y_predicted = pd.Series([]) y_true = pd.Series([]) with pytest.raises(ValueError, match="Length of inputs is 0"): cbm.score(y_true, y_predicted)
def test_cbm_objective_function(data_type, make_data_type): y_true = pd.Series([0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) y_predicted = pd.Series([0, 0, 1, 0, 0, 0, 0, 1, 1, 1]) y_true = make_data_type(data_type, y_true) y_predicted = make_data_type(data_type, y_predicted) cbm = CostBenefitMatrix(true_positive=10, true_negative=-1, false_positive=-7, false_negative=-2) assert np.isclose(cbm.objective_function(y_true, y_predicted), ((3 * 10) + (-1 * 2) + (1 * -7) + (4 * -2)) / 10)
def test_get_objective_return_instance_does_not_work_for_some_objectives(): with pytest.raises( ObjectiveCreationError, match= "In get_objective, cannot pass in return_instance=True for Cost Benefit Matrix" ): get_objective("Cost Benefit Matrix", return_instance=True) cbm = CostBenefitMatrix(0, 0, 0, 0) assert get_objective(cbm) == cbm
def test_calculate_percent_difference_negative_and_equal_numbers(): assert CostBenefitMatrix.calculate_percent_difference( score=5, baseline_score=5) == 0 assert CostBenefitMatrix.calculate_percent_difference( score=-5, baseline_score=-10) == 50 assert CostBenefitMatrix.calculate_percent_difference( score=-10, baseline_score=-5) == -100 assert CostBenefitMatrix.calculate_percent_difference( score=-5, baseline_score=10) == -150 assert CostBenefitMatrix.calculate_percent_difference( score=10, baseline_score=-5) == 300 # These values are not possible for LogLossBinary but we need them for 100% coverage # We might add an objective where lower is better that can take negative values in the future assert LogLossBinary.calculate_percent_difference( score=-5, baseline_score=-10) == -50 assert LogLossBinary.calculate_percent_difference(score=-10, baseline_score=-5) == 100 assert LogLossBinary.calculate_percent_difference(score=-5, baseline_score=10) == 150 assert LogLossBinary.calculate_percent_difference( score=10, baseline_score=-5) == -300
def test_cbm_different_input_lengths(): cbm = CostBenefitMatrix(true_positive=10, true_negative=-1, false_positive=-7, false_negative=-2) y_predicted = pd.Series([0, 0]) y_true = pd.Series([1]) with pytest.raises(ValueError, match="Inputs have mismatched dimensions"): cbm.score(y_true, y_predicted) y_true = pd.Series([0, 0]) y_predicted = pd.Series([1, 2, 0]) with pytest.raises(ValueError, match="Inputs have mismatched dimensions"): cbm.score(y_true, y_predicted)
def test_cbm_objective_automl(optimize_thresholds, X_y_binary): X, y = X_y_binary cbm = CostBenefitMatrix(true_positive=10, true_negative=-1, false_positive=-7, false_negative=-2) automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', objective=cbm, max_iterations=2, optimize_thresholds=optimize_thresholds) automl.search() pipeline = automl.best_pipeline pipeline.fit(X, y) predictions = pipeline.predict(X, cbm) assert not np.isnan(predictions.to_series()).values.any() assert not np.isnan(pipeline.predict_proba(X).to_dataframe()).values.any() assert not np.isnan(pipeline.score(X, y, [cbm])['Cost Benefit Matrix'])
def test_cbm_input_contains_nan(X_y_binary): y_predicted = pd.Series([np.nan, 0, 0]) y_true = pd.Series([1, 2, 1]) cbm = CostBenefitMatrix(true_positive=10, true_negative=-1, false_positive=-7, false_negative=-2) with pytest.raises(ValueError, match="y_predicted contains NaN or infinity"): cbm.score(y_true, y_predicted) y_true = pd.Series([np.nan, 0, 0]) y_predicted = pd.Series([1, 2, 0]) with pytest.raises(ValueError, match="y_true contains NaN or infinity"): cbm.score(y_true, y_predicted)
def test_cbm_input_contains_inf(capsys): cbm = CostBenefitMatrix(true_positive=10, true_negative=-1, false_positive=-7, false_negative=-2) y_predicted = np.array([np.inf, 0, 0]) y_true = np.array([1, 0, 0]) with pytest.raises(ValueError, match="y_predicted contains NaN or infinity"): cbm.score(y_true, y_predicted) y_true = pd.Series([np.inf, 0, 0]) y_predicted = pd.Series([1, 0, 0]) with pytest.raises(ValueError, match="y_true contains NaN or infinity"): cbm.score(y_true, y_predicted)
def test_cbm_binary_more_than_two_unique_values(): cbm = CostBenefitMatrix(true_positive=10, true_negative=-1, false_positive=-7, false_negative=-2) y_predicted = pd.Series([0, 1, 2]) y_true = pd.Series([1, 0, 1]) with pytest.raises( ValueError, match="y_predicted contains more than two unique values"): cbm.score(y_true, y_predicted) y_true = pd.Series([0, 1, 2]) y_predicted = pd.Series([1, 0, 1]) with pytest.raises(ValueError, match="y_true contains more than two unique values"): cbm.score(y_true, y_predicted)