def test_init_raw_predictions_shapes(): # Make sure get_init_raw_predictions returns float64 arrays with shape # (n_samples, K) where K is 1 for binary classification and regression, and # K = n_classes for multiclass classification rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 5)) y = rng.normal(size=n_samples) for loss in (LeastSquaresError(n_classes=1), LeastAbsoluteError(n_classes=1), QuantileLossFunction(n_classes=1), HuberLossFunction(n_classes=1)): init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, 1) assert raw_predictions.dtype == np.float64 y = rng.randint(0, 2, size=n_samples) for loss in (BinomialDeviance(n_classes=2), ExponentialLoss(n_classes=2)): init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, 1) assert raw_predictions.dtype == np.float64 for n_classes in range(3, 5): y = rng.randint(0, n_classes, size=n_samples) loss = MultinomialDeviance(n_classes=n_classes) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, n_classes) assert raw_predictions.dtype == np.float64
def test_lad_equals_quantiles(seed, alpha): # Make sure quantile loss with alpha = .5 is equivalent to LAD lad = LeastAbsoluteError() ql = QuantileLossFunction(alpha=alpha) n_samples = 50 rng = np.random.RandomState(seed) raw_predictions = rng.normal(size=(n_samples)) y_true = rng.normal(size=(n_samples)) lad_loss = lad(y_true, raw_predictions) ql_loss = ql(y_true, raw_predictions) if alpha == 0.5: assert lad_loss == approx(2 * ql_loss) weights = np.linspace(0, 1, n_samples)**2 lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights) ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights) if alpha == 0.5: assert lad_weighted_loss == approx(2 * ql_weighted_loss) pbl_weighted_loss = mean_pinball_loss(y_true, raw_predictions, sample_weight=weights, alpha=alpha) assert pbl_weighted_loss == approx(ql_weighted_loss)
def test_lad_equals_quantile_50(seed): # Make sure quantile loss with alpha = .5 is equivalent to LAD lad = LeastAbsoluteError(n_classes=1) ql = QuantileLossFunction(n_classes=1, alpha=0.5) n_samples = 50 rng = np.random.RandomState(seed) raw_predictions = rng.normal(size=(n_samples)) y_true = rng.normal(size=(n_samples)) lad_loss = lad(y_true, raw_predictions) ql_loss = ql(y_true, raw_predictions) assert_almost_equal(lad_loss, 2 * ql_loss) weights = np.linspace(0, 1, n_samples)**2 lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights) ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights) assert_almost_equal(lad_weighted_loss, 2 * ql_weighted_loss)