def test_different_input_lengths(): fraud_cost = FraudCost(amount_col="value") y_predicted = np.array([0, 0]) y_true = np.array([1]) with pytest.raises(ValueError, match="Inputs have mismatched dimensions"): fraud_cost.score(y_true, y_predicted) y_true = np.array([0, 0]) y_predicted = np.array([1, 2, 0]) with pytest.raises(ValueError, match="Inputs have mismatched dimensions"): fraud_cost.score(y_true, y_predicted)
def test_input_contains_inf(capsys): fraud_cost = FraudCost(amount_col="value") y_predicted = np.array([np.inf, 0, 0]) y_true = np.array([1, 0, 0]) with pytest.raises(ValueError, match="y_predicted contains NaN or infinity"): fraud_cost.score(y_true, y_predicted) y_true = np.array([np.inf, 0, 0]) y_predicted = np.array([1, 0, 0]) with pytest.raises(ValueError, match="y_true contains NaN or infinity"): fraud_cost.score(y_true, y_predicted)
def test_input_contains_nan(X_y_binary): fraud_cost = FraudCost(amount_col="value") y_predicted = np.array([np.nan, 0, 0]) y_true = np.array([1, 2, 1]) with pytest.raises(ValueError, match="y_predicted contains NaN or infinity"): fraud_cost.score(y_true, y_predicted) y_true = np.array([np.nan, 0, 0]) y_predicted = np.array([1, 2, 0]) with pytest.raises(ValueError, match="y_true contains NaN or infinity"): fraud_cost.score(y_true, y_predicted)
def test_binary_more_than_two_unique_values(): fraud_cost = FraudCost(amount_col="value") y_predicted = np.array([0, 1, 2]) y_true = np.array([1, 0, 1]) with pytest.raises( ValueError, match="y_predicted contains more than two unique values"): fraud_cost.score(y_true, y_predicted) y_true = np.array([0, 1, 2]) y_predicted = np.array([1, 0, 1]) with pytest.raises(ValueError, match="y_true contains more than two unique values"): fraud_cost.score(y_true, y_predicted)
def test_fraud_objective_score(X_y_binary): X, y = X_y_binary fraud_cost = FraudCost(amount_col="value") y_predicted = pd.Series([.1, .5, .5]) y_true = pd.Series([True, False, True]) extra_columns = pd.DataFrame({"value": [100, 5, 250]}) out = fraud_cost.decision_function(y_predicted, 5, extra_columns) assert isinstance(out, pd.Series) pd.testing.assert_series_equal(out, y_true, check_names=False) score = fraud_cost.score(y_true, out, extra_columns) assert (score == 0.0) out = fraud_cost.decision_function(y_predicted.to_numpy(), 5, extra_columns) assert isinstance(out, pd.Series) pd.testing.assert_series_equal(out, y_true, check_names=False) score = fraud_cost.score(y_true, out, extra_columns) assert (score == 0.0) out = ww.DataColumn( fraud_cost.decision_function(y_predicted, 5, extra_columns)) pd.testing.assert_series_equal(out.to_series(), y_true, check_dtype=False, check_names=False) score = fraud_cost.score(y_true, out, extra_columns) assert (score == 0.0) # testing with other types of inputs y_predicted = np.array([.1, .5, .5]) extra_columns = pd.DataFrame({"value": [100, 5, 250]}) out = fraud_cost.decision_function(y_predicted, 5, extra_columns) pd.testing.assert_series_equal(out, y_true, check_names=False) score = fraud_cost.score(y_true, out, extra_columns) assert (score == 0.0) y_predicted = pd.Series([.2, .01, .01]) extra_columns = pd.DataFrame({"value": [100, 50, 50]}) y_true = pd.Series([False, False, True]) expected_y_pred = pd.Series([True, False, False]) out = fraud_cost.decision_function(y_predicted, 10, extra_columns) pd.testing.assert_series_equal(out, expected_y_pred, check_names=False) score = fraud_cost.score(y_true, out, extra_columns) assert (score == 0.255)
def test_fraud_objective_score_list(X_y_binary): X, y = X_y_binary fraud_cost = FraudCost(amount_col="value") y_predicted = [.1, .5, .5] y_true = [True, False, True] extra_columns = pd.DataFrame({"value": [100, 5, 250]}) out = fraud_cost.decision_function(y_predicted, 5, extra_columns) assert isinstance(out, pd.Series) pd.testing.assert_series_equal(out, pd.Series(y_true), check_names=False) score = fraud_cost.score(y_true, out, extra_columns) assert (score == 0.0)
def test_zero_input_lengths(): fraud_cost = FraudCost(amount_col="value") y_predicted = np.array([]) y_true = np.array([]) with pytest.raises(ValueError, match="Length of inputs is 0"): fraud_cost.score(y_true, y_predicted)