def test_binomial_deviance(): # Check binomial deviance loss. # Check against alternative definitions in ESLII. bd = BinomialDeviance(2) # pred has the same BD for y in {0, 1} assert (bd(np.array([0.0]), np.array([0.0])) == bd(np.array([1.0]), np.array([0.0]))) assert_almost_equal( bd(np.array([1.0, 1.0, 1.0]), np.array([100.0, 100.0, 100.0])), 0.0) assert_almost_equal( bd(np.array([1.0, 0.0, 0.0]), np.array([100.0, -100.0, -100.0])), 0) # check if same results as alternative definition of deviance (from ESLII) alt_dev = lambda y, pred: np.mean( np.logaddexp(0.0, -2.0 * (2.0 * y - 1) * pred)) test_data = [(np.array([1.0, 1.0, 1.0]), np.array([100.0, 100.0, 100.0])), (np.array([0.0, 0.0, 0.0]), np.array([100.0, 100.0, 100.0])), (np.array([0.0, 0.0, 0.0]), np.array([-100.0, -100.0, -100.0])), (np.array([1.0, 1.0, 1.0]), np.array([-100.0, -100.0, -100.0]))] for datum in test_data: assert_almost_equal(bd(*datum), alt_dev(*datum)) # check the gradient against the alt_ng = lambda y, pred: (2 * y - 1) / (1 + np.exp(2 * (2 * y - 1) * pred)) for datum in test_data: assert_almost_equal(bd.negative_gradient(*datum), alt_ng(*datum))
def test_binomial_deviance(): # Check binomial deviance loss. # Check against alternative definitions in ESLII. bd = BinomialDeviance(2) # pred has the same BD for y in {0, 1} assert_equal(bd(np.array([0.0]), np.array([0.0])), bd(np.array([1.0]), np.array([0.0]))) assert_almost_equal(bd(np.array([1.0, 1.0, 1.0]), np.array([100.0, 100.0, 100.0])), 0.0) assert_almost_equal(bd(np.array([1.0, 0.0, 0.0]), np.array([100.0, -100.0, -100.0])), 0) # check if same results as alternative definition of deviance (from ESLII) alt_dev = lambda y, pred: np.mean(np.logaddexp(0.0, -2.0 * (2.0 * y - 1) * pred)) test_data = [(np.array([1.0, 1.0, 1.0]), np.array([100.0, 100.0, 100.0])), (np.array([0.0, 0.0, 0.0]), np.array([100.0, 100.0, 100.0])), (np.array([0.0, 0.0, 0.0]), np.array([-100.0, -100.0, -100.0])), (np.array([1.0, 1.0, 1.0]), np.array([-100.0, -100.0, -100.0]))] for datum in test_data: assert_almost_equal(bd(*datum), alt_dev(*datum)) # check the gradient against the alt_ng = lambda y, pred: (2 * y - 1) / (1 + np.exp(2 * (2 * y - 1) * pred)) for datum in test_data: assert_almost_equal(bd.negative_gradient(*datum), alt_ng(*datum))
def test_init_raw_predictions_shapes(): # Make sure get_init_raw_predictions returns float64 arrays with shape # (n_samples, K) where K is 1 for binary classification and regression, and # K = n_classes for multiclass classification rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 5)) y = rng.normal(size=n_samples) for loss in (LeastSquaresError(n_classes=1), LeastAbsoluteError(n_classes=1), QuantileLossFunction(n_classes=1), HuberLossFunction(n_classes=1)): init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, 1) assert raw_predictions.dtype == np.float64 y = rng.randint(0, 2, size=n_samples) for loss in (BinomialDeviance(n_classes=2), ExponentialLoss(n_classes=2)): init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, 1) assert raw_predictions.dtype == np.float64 for n_classes in range(3, 5): y = rng.randint(0, n_classes, size=n_samples) loss = MultinomialDeviance(n_classes=n_classes) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) assert raw_predictions.shape == (n_samples, n_classes) assert raw_predictions.dtype == np.float64
def test_binomial_deviance(): # Check binomial deviance loss. # Check against alternative definitions in ESLII. bd = BinomialDeviance(2) # pred has the same BD for y in {0, 1} assert bd(np.array([0.0]), np.array([0.0])) == bd(np.array([1.0]), np.array([0.0])) assert bd(np.array([1.0, 1, 1]), np.array([100.0, 100, 100])) == approx(0) assert bd(np.array([1.0, 0, 0]), np.array([100.0, -100, -100])) == approx(0) # check if same results as alternative definition of deviance, from ESLII # Eq. (10.18): -loglike = log(1 + exp(-2*z*f)) # Note: # - We use y = {0, 1}, ESL (10.18) uses z in {-1, 1}, hence y=2*y-1 # - ESL 2*f = pred_raw, hence the factor 2 of ESL disappears. # - Deviance = -2*loglike + .., hence a factor of 2 in front. def alt_dev(y, raw_pred): z = 2 * y - 1 return 2 * np.mean(np.log(1 + np.exp(-z * raw_pred))) test_data = product( (np.array([0.0, 0, 0]), np.array([1.0, 1, 1])), (np.array([-5.0, -5, -5]), np.array([3.0, 3, 3])), ) for datum in test_data: assert bd(*datum) == approx(alt_dev(*datum)) # check the negative gradient against altenative formula from ESLII # Note: negative_gradient is half the negative gradient. def alt_ng(y, raw_pred): z = 2 * y - 1 return z / (1 + np.exp(z * raw_pred)) for datum in test_data: assert bd.negative_gradient(*datum) == approx(alt_ng(*datum))
def test_init_raw_predictions_values(): # Make sure the get_init_raw_predictions() returns the expected values for # each loss. rng = np.random.RandomState(0) n_samples = 100 X = rng.normal(size=(n_samples, 5)) y = rng.normal(size=n_samples) # Least squares loss loss = LeastSquaresError(n_classes=1) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) # Make sure baseline prediction is the mean of all targets assert_almost_equal(raw_predictions, y.mean()) # Least absolute and huber loss for Loss in (LeastAbsoluteError, HuberLossFunction): loss = Loss(n_classes=1) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) # Make sure baseline prediction is the median of all targets assert_almost_equal(raw_predictions, np.median(y)) # Quantile loss for alpha in (.1, .5, .9): loss = QuantileLossFunction(n_classes=1, alpha=alpha) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) # Make sure baseline prediction is the alpha-quantile of all targets assert_almost_equal(raw_predictions, np.percentile(y, alpha * 100)) y = rng.randint(0, 2, size=n_samples) # Binomial deviance loss = BinomialDeviance(n_classes=2) init_estimator = loss.init_estimator().fit(X, y) # Make sure baseline prediction is equal to link_function(p), where p # is the proba of the positive class. We want predict_proba() to return p, # and by definition # p = inverse_link_function(raw_prediction) = sigmoid(raw_prediction) # So we want raw_prediction = link_function(p) = log(p / (1 - p)) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) p = y.mean() assert_almost_equal(raw_predictions, np.log(p / (1 - p))) # Exponential loss loss = ExponentialLoss(n_classes=2) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) p = y.mean() assert_almost_equal(raw_predictions, .5 * np.log(p / (1 - p))) # Multinomial deviance loss for n_classes in range(3, 5): y = rng.randint(0, n_classes, size=n_samples) loss = MultinomialDeviance(n_classes=n_classes) init_estimator = loss.init_estimator().fit(X, y) raw_predictions = loss.get_init_raw_predictions(y, init_estimator) for k in range(n_classes): p = (y == k).mean() assert_almost_equal(raw_predictions[:, k], np.log(p))