def test_weighted_percentile_2d(): # Check for when array 2D and sample_weight 1D rng = np.random.RandomState(0) x1 = rng.randint(10, size=10) w1 = rng.choice(5, size=10) x2 = rng.randint(20, size=10) x_2d = np.vstack((x1, x2)).T w_median = _weighted_percentile(x_2d, w1) p_axis_0 = [ _weighted_percentile(x_2d[:, i], w1) for i in range(x_2d.shape[1]) ] assert_allclose(w_median, p_axis_0) # Check when array and sample_weight boht 2D w2 = rng.choice(5, size=10) w_2d = np.vstack((w1, w2)).T w_median = _weighted_percentile(x_2d, w_2d) p_axis_0 = [ _weighted_percentile(x_2d[:, i], w_2d[:, i]) for i in range(x_2d.shape[1]) ] assert_allclose(w_median, p_axis_0)
def test_weighted_percentile_zero_weight_zero_percentile(): y = np.array([0, 1, 2, 3, 4, 5]) sw = np.array([0, 0, 1, 1, 1, 0]) score = _weighted_percentile(y, sw, 0) assert approx(score) == 2 score = _weighted_percentile(y, sw, 50) assert approx(score) == 3 score = _weighted_percentile(y, sw, 100) assert approx(score) == 4
def test_dummy_regressor_sample_weight(n_samples=10): random_state = np.random.RandomState(seed=1) X = [[0]] * n_samples y = random_state.rand(n_samples) sample_weight = random_state.rand(n_samples) est = DummyRegressor(strategy="mean").fit(X, y, sample_weight) assert est.constant_ == np.average(y, weights=sample_weight) est = DummyRegressor(strategy="median").fit(X, y, sample_weight) assert est.constant_ == _weighted_percentile(y, sample_weight, 50.0) est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight) assert est.constant_ == _weighted_percentile(y, sample_weight, 95.0)
def test_dummy_regressor_sample_weight(n_samples=10): random_state = np.random.RandomState(seed=1) X = [[0]] * n_samples y = random_state.rand(n_samples) sample_weight = random_state.rand(n_samples) est = DummyRegressor(strategy="mean").fit(X, y, sample_weight) assert_equal(est.constant_, np.average(y, weights=sample_weight)) est = DummyRegressor(strategy="median").fit(X, y, sample_weight) assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 50.0)) est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight) assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 95.0))
def test_weighted_percentile_zero_weight(): y = np.empty(102, dtype=np.float64) y.fill(1.0) sw = np.ones(102, dtype=np.float64) sw.fill(0.0) score = _weighted_percentile(y, sw, 50) assert score == 1.0
def test_weighted_percentile_equal(): y = np.empty(102, dtype=np.float64) y.fill(0.0) sw = np.ones(102, dtype=np.float64) sw[-1] = 0.0 score = _weighted_percentile(y, sw, 50) assert score == 0
def test_weighted_percentile_zero_weight(): y = np.empty(102, dtype=np.float64) y.fill(1.0) sw = np.ones(102, dtype=np.float64) sw.fill(0.0) score = _weighted_percentile(y, sw, 50) assert score == 1.0
def test_weighted_percentile_equal(): y = np.empty(102, dtype=np.float64) y.fill(0.0) sw = np.ones(102, dtype=np.float64) sw[-1] = 0.0 score = _weighted_percentile(y, sw, 50) assert score == 0
def median_absolute_error(y_true, y_pred, *, multioutput='uniform_average', sample_weight=None): """Median absolute error regression loss. Median absolute error output is non-negative floating point. The best value is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`. Parameters ---------- y_true : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. sample_weight : array-like of shape (n_samples,), default=None Sample weights. .. versionadded:: 0.24 Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. Examples -------- >>> from sklearn.metrics import median_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> median_absolute_error(y_true, y_pred) 0.5 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> median_absolute_error(y_true, y_pred) 0.75 >>> median_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85 """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) output_errors = np.empty(shape=y_true.shape, dtype=object) for k, (y_t, y_p) in enumerate(zip(y_true, y_pred)): if sample_weight is None: output_errors[k] = np.median(np.abs(y_t - y_p), axis=0) else: output_errors[k] = _weighted_percentile(np.abs(y_pred - y_true), sample_weight=sample_weight) if isinstance(multioutput, str): if multioutput == 'uniform_average': # pass None as weights to np.average: uniform mean output_errors[k] = np.average(output_errors[k], weights=None) return np.average(output_errors, weights=None)
def test_weighted_percentile(): y = np.empty(102, dtype=np.float64) y[:50] = 0 y[-51:] = 2 y[-1] = 100000 y[50] = 1 sw = np.ones(102, dtype=np.float64) sw[-1] = 0.0 score = _weighted_percentile(y, sw, 50) assert score == 1
def test_weighted_median_equal_weights(): # Checks weighted percentile=0.5 is same as median when weights equal rng = np.random.RandomState(0) # Odd size as _weighted_percentile takes lower weighted percentile x = rng.randint(10, size=11) weights = np.ones(x.shape) median = np.median(x) w_median = _weighted_percentile(x, weights) assert median == approx(w_median)
def test_weighted_percentile(): y = np.empty(102, dtype=np.float64) y[:50] = 0 y[-51:] = 2 y[-1] = 100000 y[50] = 1 sw = np.ones(102, dtype=np.float64) sw[-1] = 0.0 score = _weighted_percentile(y, sw, 50) assert score == 1
def test_weighted_median_integer_weights(): # Checks weighted percentile=0.5 is same as median when manually weight # data rng = np.random.RandomState(0) x = rng.randint(20, size=10) weights = rng.choice(5, size=10) x_manual = np.repeat(x, weights) median = np.median(x_manual) w_median = _weighted_percentile(x, weights) assert median == approx(w_median)
def fit(self, X, y, sample_weight=None): allowed_strategies = ("mean", "median", "quantile", "constant", "std") if self.strategy not in allowed_strategies: raise ValueError("Unknown strategy type: %s, expected one of %s." % (self.strategy, allowed_strategies)) y = check_array(y, ensure_2d=False) if len(y) == 0: raise ValueError("y must not be empty.") self.output_2d_ = y.ndim == 2 if y.ndim == 1: y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] check_consistent_length(X, y, sample_weight) if self.strategy == "mean": self.constant_ = np.average(y, axis=0, weights=sample_weight) elif self.strategy == "std": self.constant_ = np.std(y, axis=0) elif self.strategy == "median": if sample_weight is None: self.constant_ = np.median(y, axis=0) else: self.constant_ = [ _weighted_percentile(y[:, k], sample_weight, percentile=50.) for k in range(self.n_outputs_) ] elif self.strategy == "quantile": if self.quantile is None or not np.isscalar(self.quantile): raise ValueError("Quantile must be a scalar in the range " "[0.0, 1.0], but got %s." % self.quantile) percentile = self.quantile * 100.0 if sample_weight is None: self.constant_ = np.percentile(y, axis=0, q=percentile) else: self.constant_ = [ _weighted_percentile(y[:, k], sample_weight, percentile=percentile) for k in range(self.n_outputs_) ] elif self.strategy == "constant": if self.constant is None: raise TypeError("Constant target value has to be specified " "when the constant strategy is used.") self.constant = check_array(self.constant, accept_sparse=['csr', 'csc', 'coo'], ensure_2d=False, ensure_min_samples=0) if self.output_2d_ and self.constant.shape[0] != y.shape[1]: raise ValueError("Constant target value should have " "shape (%d, 1)." % y.shape[1]) self.constant_ = self.constant self.constant_ = np.reshape(self.constant_, (1, -1)) return self
def fit(self, X, y, sample_weight=None): """Fit the random regressor. Parameters ---------- X : {array-like, object with finite length or shape} Training data, requires length = n_samples y : array-like, shape = [n_samples] or [n_samples, n_outputs] Target values. sample_weight : array-like of shape = [n_samples], optional Sample weights. Returns ------- self : object """ allowed_strategies = ("mean", "median", "quantile", "constant","median_nonzero") if self.strategy not in allowed_strategies: raise ValueError("Unknown strategy type: %s, expected one of %s." % (self.strategy, allowed_strategies)) y = check_array(y, ensure_2d=False) if len(y) == 0: raise ValueError("y must not be empty.") self.output_2d_ = y.ndim == 2 if y.ndim == 1: y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] check_consistent_length(X, y, sample_weight) if self.strategy == "mean": self.constant_ = np.average(y, axis=0, weights=sample_weight) elif self.strategy == "median": if sample_weight is None: self.constant_ = np.median(y, axis=0) else: self.constant_ = [_weighted_percentile(y[:, k], sample_weight, percentile=50.) for k in range(self.n_outputs_)] elif self.strategy == "median_nonzero": if sample_weight is None: self.constant_ = np.median(y[y > 0], axis=0) else: self.constant_ = [_weighted_percentile(y[y > 0][:, k], sample_weight, percentile=50.) for k in range(self.n_outputs_)] elif self.strategy == "quantile": if self.quantile is None or not np.isscalar(self.quantile): raise ValueError("Quantile must be a scalar in the range " "[0.0, 1.0], but got %s." % self.quantile) percentile = self.quantile * 100.0 if sample_weight is None: self.constant_ = np.percentile(y, axis=0, q=percentile) else: self.constant_ = [_weighted_percentile(y[:, k], sample_weight, percentile=percentile) for k in range(self.n_outputs_)] elif self.strategy == "constant": if self.constant is None: raise TypeError("Constant target value has to be specified " "when the constant strategy is used.") self.constant = check_array(self.constant, accept_sparse=['csr', 'csc', 'coo'], ensure_2d=False, ensure_min_samples=0) if self.output_2d_ and self.constant.shape[0] != y.shape[1]: raise ValueError( "Constant target value should have " "shape (%d, 1)." % y.shape[1]) self.constant_ = self.constant self.constant_ = np.reshape(self.constant_, (1, -1)) return self