def test_asymmetric_error(quantile): """Test quantile regression for asymmetric distributed targets.""" n_samples = 1000 rng = np.random.RandomState(42) X = np.concatenate( ( np.abs(rng.randn(n_samples)[:, None]), -rng.randint(2, size=(n_samples, 1)), ), axis=1, ) intercept = 1.23 coef = np.array([0.5, -2]) # Take care that X @ coef + intercept > 0 assert np.min(X @ coef + intercept) > 0 # For an exponential distribution with rate lambda, e.g. exp(-lambda * x), # the quantile at level q is: # quantile(q) = - log(1 - q) / lambda # scale = 1/lambda = -quantile(q) / log(1 - q) y = rng.exponential( scale=-(X @ coef + intercept) / np.log(1 - quantile), size=n_samples ) model = QuantileRegressor( quantile=quantile, alpha=0, solver="highs", ).fit(X, y) # This test can be made to pass with any solver but in the interest # of sparing continuous integration resources, the test is performed # with the fastest solver only. assert model.intercept_ == approx(intercept, rel=0.2) assert_allclose(model.coef_, coef, rtol=0.6) assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2) # Now compare to Nelder-Mead optimization with L1 penalty alpha = 0.01 model.set_params(alpha=alpha).fit(X, y) model_coef = np.r_[model.intercept_, model.coef_] def func(coef): loss = mean_pinball_loss(y, X @ coef[1:] + coef[0], alpha=quantile) L1 = np.sum(np.abs(coef[1:])) return loss + alpha * L1 res = minimize( fun=func, x0=[1, 0, -1], method="Nelder-Mead", tol=1e-12, options={"maxiter": 2000}, ) assert func(model_coef) == approx(func(res.x)) assert_allclose(model.intercept_, res.x[0]) assert_allclose(model.coef_, res.x[1:]) assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2)
def test_quantile_sample_weight(): # test that with unequal sample weights we still estimate weighted fraction n = 1000 X, y = make_regression(n_samples=n, n_features=5, random_state=0, noise=10.0) weight = np.ones(n) # when we increase weight of upper observations, # estimate of quantile should go up weight[y > y.mean()] = 100 quant = QuantileRegressor(quantile=0.5, alpha=1e-8, solver_options={"lstsq": False}) quant.fit(X, y, sample_weight=weight) fraction_below = np.mean(y < quant.predict(X)) assert fraction_below > 0.5 weighted_fraction_below = np.average(y < quant.predict(X), weights=weight) assert weighted_fraction_below == approx(0.5, abs=3e-2)
def test_quantile_estimates_calibration(q): # Test that model estimates percentage of points below the prediction X, y = make_regression(n_samples=1000, n_features=20, random_state=0, noise=1.0) quant = QuantileRegressor( quantile=q, alpha=0, solver_options={"lstsq": False}, ).fit(X, y) assert np.mean(y < quant.predict(X)) == approx(q, abs=1e-2)
def test_quantile_equals_huber_for_low_epsilon(fit_intercept): X, y = make_regression(n_samples=100, n_features=20, random_state=0, noise=1.0) alpha = 1e-4 huber = HuberRegressor( epsilon=1 + 1e-4, alpha=alpha, fit_intercept=fit_intercept ).fit(X, y) quant = QuantileRegressor(alpha=alpha, fit_intercept=fit_intercept).fit(X, y) assert_allclose(huber.coef_, quant.coef_, atol=1e-1) if fit_intercept: assert huber.intercept_ == approx(quant.intercept_, abs=1e-1) # check that we still predict fraction assert np.mean(y < quant.predict(X)) == approx(0.5, abs=1e-1)
def test_sparse_input(sparse_format, solver, fit_intercept): """Test that sparse and dense X give same results.""" X, y = make_regression(n_samples=100, n_features=20, random_state=1, noise=1.0) X_sparse = sparse_format(X) alpha = 1e-4 quant_dense = QuantileRegressor(alpha=alpha, fit_intercept=fit_intercept).fit(X, y) quant_sparse = QuantileRegressor( alpha=alpha, fit_intercept=fit_intercept, solver=solver ).fit(X_sparse, y) assert_allclose(quant_sparse.coef_, quant_dense.coef_, rtol=1e-2) if fit_intercept: assert quant_sparse.intercept_ == approx(quant_dense.intercept_) # check that we still predict fraction assert 0.45 <= np.mean(y < quant_sparse.predict(X_sparse)) <= 0.55
def get_calibration(train_approximation_distances, train_target_distances, val_approximation_distances, val_target_distances, quantile): qr = QuantileRegressor(quantile=quantile, alpha=0, solver='highs') qr.fit(train_approximation_distances.reshape(-1, 1), train_target_distances) predicted_target_distances = qr.predict( val_approximation_distances.reshape(-1, 1)).squeeze() num_leq = 0 for predicted_target, val_target in zip(predicted_target_distances, val_target_distances): if val_target <= predicted_target: num_leq += 1 return num_leq / len(val_target_distances), predicted_target_distances