Пример #1
0
def test_mape_time_series_model():
    obj = MAPE()

    s1_actual = np.array([0, 0, 1, 1, 1, 1, 2, 0, 2])
    s1_predicted = np.array([0, 1, 0, 1, 1, 2, 1, 2, 0])

    s2_actual = np.array([-1, -2, 1, 3])
    s2_predicted = np.array([1, 2, -1, -3])

    s3_actual = np.array([1, 2, 4, 2, 1, 2])
    s3_predicted = np.array([0, 2, 2, 1, 3, 2])

    with pytest.raises(
            ValueError,
            match=
            "Mean Absolute Percentage Error cannot be used when targets contain the value 0."
    ):
        obj.score(s1_actual, s1_predicted)
    assert obj.score(s2_actual, s2_predicted) == pytest.approx(8 / 4 * 100)
    assert obj.score(s3_actual, s3_predicted) == pytest.approx(4 / 6 * 100)
    assert obj.score(pd.Series(s3_actual, index=range(-12, -6)),
                     s3_predicted) == pytest.approx(4 / 6 * 100)
    assert obj.score(pd.Series(s2_actual, index=range(10, 14)),
                     pd.Series(s2_predicted,
                               index=range(20,
                                           24))) == pytest.approx(8 / 4 * 100)
    y = pd.Series([2, 3, 0, 1, 1])

    invalid_targets_check = InvalidTargetDataCheck(problem_type="regression", objective=objective)

    assert invalid_targets_check.validate(X, y) == {
        "warnings": [],
        "errors": [DataCheckError(
            message=f"Target has non-positive values which is not supported for {objective}",
            data_check_name=invalid_targets_data_check_name,
            message_code=DataCheckMessageCode.TARGET_INCOMPATIBLE_OBJECTIVE,
            details={"Count of offending values": sum(val <= 0 for val in y.values.flatten())}).to_dict()],
        "actions": []
    }


@pytest.mark.parametrize("objective", [RootMeanSquaredLogError(), MeanSquaredLogError(), MAPE()])
def test_invalid_target_data_check_invalid_labels_for_nonnegative_objective_instances(objective):
    X = pd.DataFrame({'column_one': [100, 200, 100, 200, 200, 100, 200, 100] * 25})
    y = pd.Series([2, 2, 3, 3, -1, -1, 1, 1] * 25)

    data_checks = DataChecks([InvalidTargetDataCheck], {"InvalidTargetDataCheck": {"problem_type": "multiclass",
                                                                                   "objective": objective}})

    assert data_checks.validate(X, y) == {
        "warnings": [],
        "errors": [DataCheckError(
            message=f"Target has non-positive values which is not supported for {objective.name}",
            data_check_name=invalid_targets_data_check_name,
            message_code=DataCheckMessageCode.TARGET_INCOMPATIBLE_OBJECTIVE,
            details={"Count of offending values": sum(val <= 0 for val in y.values.flatten())}).to_dict()],
        "actions": []
                f"Target has non-positive values which is not supported for {objective}",
                data_check_name=invalid_targets_data_check_name,
                message_code=DataCheckMessageCode.
                TARGET_INCOMPATIBLE_OBJECTIVE,
                details={
                    "Count of offending values":
                    sum(val <= 0 for val in y.values.flatten())
                }).to_dict()
        ]
    }


@pytest.mark.parametrize(
    "objective", [RootMeanSquaredLogError(),
                  MeanSquaredLogError(),
                  MAPE()])
def test_invalid_target_data_check_invalid_labels_for_nonnegative_objective_instances(
        objective):
    X = pd.DataFrame(
        {'column_one': [100, 200, 100, 200, 200, 100, 200, 100] * 25})
    y = pd.Series([2, 2, 3, 3, -1, -1, 1, 1] * 25)

    data_checks = DataChecks(
        [InvalidTargetDataCheck], {
            "InvalidTargetDataCheck": {
                "problem_type": "multiclass",
                "objective": objective
            }
        })

    assert data_checks.validate(X, y) == {