def test_tweedie_distribution_power(): msg = "distribution is only defined for power<=0 and power>=1" with pytest.raises(ValueError, match=msg): TweedieDistribution(power=0.5) with pytest.raises(TypeError, match="must be a real number"): TweedieDistribution(power=1j) with pytest.raises(TypeError, match="must be a real number"): dist = TweedieDistribution() dist.power = 1j dist = TweedieDistribution() assert isinstance(dist._lower_bound, DistributionBoundary) assert dist._lower_bound.inclusive is False dist.power = 1 assert dist._lower_bound.value == 0.0 assert dist._lower_bound.inclusive is True
def test_tweedie_regression_family(regression_data): # Make sure the family attribute is always a TweedieDistribution and that # the power attribute is properly updated power = 2.0 est = TweedieRegressor(power=power) assert isinstance(est.family, TweedieDistribution) assert est.family.power == power assert est.power == power new_power = 0 new_family = TweedieDistribution(power=new_power) est.family = new_family assert isinstance(est.family, TweedieDistribution) assert est.family.power == new_power assert est.power == new_power msg = "TweedieRegressor.family must be of type TweedieDistribution!" with pytest.raises(TypeError, match=msg): est.family = None
def test_invalid_distribution_bound(): dist = TweedieDistribution() dist._lower_bound = 0 with pytest.raises(TypeError, match="must be of type DistributionBoundary"): dist.in_y_range([-1, 0, 1])
from sklearn._loss.glm_distribution import ( TweedieDistribution, NormalDistribution, PoissonDistribution, GammaDistribution, InverseGaussianDistribution, DistributionBoundary, ) @pytest.mark.parametrize( "family, expected", [ (NormalDistribution(), [True, True, True]), (PoissonDistribution(), [False, True, True]), (TweedieDistribution(power=1.5), [False, True, True]), (GammaDistribution(), [False, False, True]), (InverseGaussianDistribution(), [False, False, True]), (TweedieDistribution(power=4.5), [False, False, True]), ], ) def test_family_bounds(family, expected): """Test the valid range of distributions at -1, 0, 1.""" result = family.in_y_range([-1, 0, 1]) assert_array_equal(result, expected) def test_invalid_distribution_bound(): dist = TweedieDistribution() dist._lower_bound = 0 with pytest.raises(TypeError,
[ (PoissonRegressor(), True), (GammaRegressor(), True), (TweedieRegressor(power=1.5), True), (TweedieRegressor(power=0), False), ], ) def test_tags(estimator, value): assert estimator._get_tags()["requires_positive_y"] is value # TODO(1.3): remove @pytest.mark.parametrize( "est, family", [ (PoissonRegressor(), "poisson"), (GammaRegressor(), "gamma"), (TweedieRegressor(), TweedieDistribution()), (TweedieRegressor(power=2), TweedieDistribution(power=2)), (TweedieRegressor(power=3), TweedieDistribution(power=3)), ], ) def test_family_deprecation(est, family): """Test backward compatibility of the family property.""" with pytest.warns(FutureWarning, match="`family` was deprecated"): if isinstance(family, str): assert est.family == family else: assert est.family.__class__ == family.__class__ assert est.family.power == family.power
X, y, sample_weight=sample_weight_1 ) glm2 = GeneralizedLinearRegressor(**glm_params).fit(X2, y2, sample_weight=None) assert_allclose(glm1.coef_, glm2.coef_) @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize( "family", [ NormalDistribution(), PoissonDistribution(), GammaDistribution(), InverseGaussianDistribution(), TweedieDistribution(power=1.5), TweedieDistribution(power=4.5), ], ) def test_glm_log_regression(fit_intercept, family): """Test GLM regression with log link on a simple dataset.""" coef = [0.2, -0.1] X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T y = np.exp(np.dot(X, coef)) glm = GeneralizedLinearRegressor( alpha=0, family=family, link="log", fit_intercept=fit_intercept, tol=1e-7 ) if fit_intercept: res = glm.fit(X[:, 1:], y) assert_allclose(res.coef_, coef[1:], rtol=1e-6) assert_allclose(res.intercept_, coef[0], rtol=1e-6)