def test_init_raw_predictions_shapes(): # Make sure get_init_raw_predictions returns float64 arrays with shape # (n_samples, K) where K is 1 for binary classification and regression, and # K = n_classes for multiclass classification rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 5)) y = rng.normal(size=n_samples) for loss in (LeastSquaresError(n_classes=1), LeastAbsoluteError(n_classes=1), QuantileLossFunction(n_classes=1), HuberLossFunction(n_classes=1)): init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, 1) assert raw_predictions.dtype == np.float64 y = rng.randint(0, 2, size=n_samples) for loss in (BinomialDeviance(n_classes=2), ExponentialLoss(n_classes=2)): init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, 1) assert raw_predictions.dtype == np.float64 for n_classes in range(3, 5): y = rng.randint(0, n_classes, size=n_samples) loss = MultinomialDeviance(n_classes=n_classes) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, n_classes) assert raw_predictions.dtype == np.float64
def test_lad_equals_quantiles(seed, alpha): # Make sure quantile loss with alpha = .5 is equivalent to LAD lad = LeastAbsoluteError() ql = QuantileLossFunction(alpha=alpha) n_samples = 50 rng = np.random.RandomState(seed) raw_predictions = rng.normal(size=(n_samples)) y_true = rng.normal(size=(n_samples)) lad_loss = lad(y_true, raw_predictions) ql_loss = ql(y_true, raw_predictions) if alpha == 0.5: assert lad_loss == approx(2 * ql_loss) weights = np.linspace(0, 1, n_samples)**2 lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights) ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights) if alpha == 0.5: assert lad_weighted_loss == approx(2 * ql_weighted_loss) pbl_weighted_loss = mean_pinball_loss(y_true, raw_predictions, sample_weight=weights, alpha=alpha) assert pbl_weighted_loss == approx(ql_weighted_loss)
def test_quantile_loss_function(): # Non regression test for the QuantileLossFunction object # There was a sign problem when evaluating the function # for negative values of 'ytrue - ypred' x = np.asarray([-1.0, 0.0, 1.0]) y_found = QuantileLossFunction(1, 0.9)(x, np.zeros_like(x)) y_expected = np.asarray([0.1, 0.0, 0.9]).mean() np.testing.assert_allclose(y_found, y_expected)
def test_lad_equals_quantile_50(seed): # Make sure quantile loss with alpha = .5 is equivalent to LAD lad = LeastAbsoluteError(n_classes=1) ql = QuantileLossFunction(n_classes=1, alpha=0.5) n_samples = 50 rng = np.random.RandomState(seed) raw_predictions = rng.normal(size=(n_samples)) y_true = rng.normal(size=(n_samples)) lad_loss = lad(y_true, raw_predictions) ql_loss = ql(y_true, raw_predictions) assert_almost_equal(lad_loss, 2 * ql_loss) weights = np.linspace(0, 1, n_samples)**2 lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights) ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights) assert_almost_equal(lad_weighted_loss, 2 * ql_weighted_loss)
def test_init_raw_predictions_values(): # Make sure the get_init_raw_predictions() returns the expected values for # each loss. rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 5)) y = rng.normal(size=n_samples) # Least squares loss loss = LeastSquaresError(n_classes=1) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) # Make sure baseline prediction is the mean of all targets assert_almost_equal(raw_predictions, y.mean()) # Least absolute and huber loss for Loss in (LeastAbsoluteError, HuberLossFunction): loss = Loss(n_classes=1) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) # Make sure baseline prediction is the median of all targets assert_almost_equal(raw_predictions, np.median(y)) # Quantile loss for alpha in (.1, .5, .9): loss = QuantileLossFunction(n_classes=1, alpha=alpha) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) # Make sure baseline prediction is the alpha-quantile of all targets assert_almost_equal(raw_predictions, np.percentile(y, alpha * 100)) y = rng.randint(0, 2, size=n_samples) # Binomial deviance loss = BinomialDeviance(n_classes=2) init_estimator = loss.init_estimator().fit(X, y) # Make sure baseline prediction is equal to link_function(p), where p # is the proba of the positive class. We want predict_proba() to return p, # and by definition # p = inverse_link_function(raw_prediction) = sigmoid(raw_prediction) # So we want raw_prediction = link_function(p) = log(p / (1 - p)) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) p = y.mean() assert_almost_equal(raw_predictions, np.log(p / (1 - p))) # Exponential loss loss = ExponentialLoss(n_classes=2) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) p = y.mean() assert_almost_equal(raw_predictions, .5 * np.log(p / (1 - p))) # Multinomial deviance loss for n_classes in range(3, 5): y = rng.randint(0, n_classes, size=n_samples) loss = MultinomialDeviance(n_classes=n_classes) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) for k in range(n_classes): p = (y == k).mean() assert_almost_equal(raw_predictions[:, k], np.log(p))