def test_multimetric_scorer_calls_method_once(scorers, expected_predict_count, expected_predict_proba_count, expected_decision_func_count): X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0]) mock_est = Mock() fit_func = Mock(return_value=mock_est) predict_func = Mock(return_value=y) pos_proba = np.random.rand(X.shape[0]) proba = np.c_[1 - pos_proba, pos_proba] predict_proba_func = Mock(return_value=proba) decision_function_func = Mock(return_value=pos_proba) mock_est.fit = fit_func mock_est.predict = predict_func mock_est.predict_proba = predict_proba_func mock_est.decision_function = decision_function_func scorer_dict, _ = _check_multimetric_scoring(LogisticRegression(), scorers) multi_scorer = _MultimetricScorer(**scorer_dict) results = multi_scorer(mock_est, X, y) assert set(scorers) == set(results) # compare dict keys assert predict_func.call_count == expected_predict_count assert predict_proba_func.call_count == expected_predict_proba_count assert decision_function_func.call_count == expected_decision_func_count
def test_multimetric_scorer_sanity_check(): # scoring dictionary returned is the same as calling each scorer seperately scorers = { 'a1': 'accuracy', 'a2': 'accuracy', 'll1': 'neg_log_loss', 'll2': 'neg_log_loss', 'ra1': 'roc_auc', 'ra2': 'roc_auc' } X, y = make_classification(random_state=0) clf = DecisionTreeClassifier() clf.fit(X, y) scorer_dict, _ = _check_multimetric_scoring(clf, scorers) multi_scorer = _MultimetricScorer(**scorer_dict) result = multi_scorer(clf, X, y) seperate_scores = { name: get_scorer(name)(clf, X, y) for name in ['accuracy', 'neg_log_loss', 'roc_auc'] } for key, value in result.items(): score_name = scorers[key] assert_allclose(value, seperate_scores[score_name])
def test_multimetric_scorer_sanity_check(): # scoring dictionary returned is the same as calling each scorer separately scorers = { "a1": "accuracy", "a2": "accuracy", "ll1": "neg_log_loss", "ll2": "neg_log_loss", "ra1": "roc_auc", "ra2": "roc_auc", } X, y = make_classification(random_state=0) clf = DecisionTreeClassifier() clf.fit(X, y) scorer_dict = _check_multimetric_scoring(clf, scorers) multi_scorer = _MultimetricScorer(**scorer_dict) result = multi_scorer(clf, X, y) separate_scores = { name: get_scorer(name)(clf, X, y) for name in ["accuracy", "neg_log_loss", "roc_auc"] } for key, value in result.items(): score_name = scorers[key] assert_allclose(value, separate_scores[score_name])
def _score_weighted(estimator, x_test, y_test, scorer, sample_weights=None): """Expand :func:`sklearn.model_selection._validation._score`.""" if isinstance(scorer, dict): # will cache method calls if needed. scorer() returns a dict scorer = _MultimetricScorer(**scorer) if y_test is None: scores = scorer(estimator, x_test, sample_weight=sample_weights) else: scores = scorer(estimator, x_test, y_test, sample_weight=sample_weights) error_msg = ("scoring must return a number, got %s (%s) " "instead. (scorer=%s)") if isinstance(scores, dict): for name, score in scores.items(): if hasattr(score, 'item'): with suppress(ValueError): # e.g. unwrap memmapped scalars score = score.item() if not isinstance(score, numbers.Number): raise ValueError(error_msg % (score, type(score), name)) scores[name] = score else: # scalar if hasattr(scores, 'item'): with suppress(ValueError): # e.g. unwrap memmapped scalars scores = scores.item() if not isinstance(scores, numbers.Number): raise ValueError(error_msg % (scores, type(scores), scorer)) return scores
def _skl_score(estimator, X_test, y_test, scorer): """Compute the score(s) of an estimator on a given test set. Will return a dict of floats if `scorer` is a dict, otherwise a single float is returned. """ if isinstance(scorer, dict): # will cache method calls if needed. scorer() returns a dict scorer = _MultimetricScorer(**scorer) if y_test is None: scores = scorer(estimator, X_test) else: scores = scorer(estimator, X_test, y_test) error_msg = ("scoring must return a number, got %s (%s) " "instead. (scorer=%s)") if isinstance(scores, dict): for name, score in scores.items(): if hasattr(score, "item"): with suppress(ValueError): # e.g. unwrap memmapped scalars score = score.item() if not isinstance(score, numbers.Number): raise ValueError(error_msg % (score, type(score), name)) scores[name] = score else: # scalar if hasattr(scores, "item"): with suppress(ValueError): # e.g. unwrap memmapped scalars scores = scores.item() if not isinstance(scores, numbers.Number): raise ValueError(error_msg % (scores, type(scores), scorer)) return scores
def scoring(self, scoring): # Scorer scoring = _check_multimetric_scoring(self.estimator, scoring) # IF scoring is a tuple (older versions of scikit-learn), we take only the first element if isinstance(scoring, tuple): scoring = scoring[0] # This is a dict of scorers self._scoring_dict = scoring # Make it efficient scoring = _MultimetricScorer( **scoring ) # This is a single function returning a dict (with the metrics) self._scoring = scoring
def test_multimetric_scorer_calls_method_once_regressor_threshold(): predict_called_cnt = 0 class MockDecisionTreeRegressor(DecisionTreeRegressor): def predict(self, X): nonlocal predict_called_cnt predict_called_cnt += 1 return super().predict(X) X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0]) # no decision function clf = MockDecisionTreeRegressor() clf.fit(X, y) scorers = {'neg_mse': 'neg_mean_squared_error', 'r2': 'roc_auc'} scorer_dict, _ = _check_multimetric_scoring(clf, scorers) scorer = _MultimetricScorer(**scorer_dict) scorer(clf, X, y) assert predict_called_cnt == 1
def test_multimetric_scorer_calls_method_once_classifier_no_decision(): predict_proba_call_cnt = 0 class MockKNeighborsClassifier(KNeighborsClassifier): def predict_proba(self, X): nonlocal predict_proba_call_cnt predict_proba_call_cnt += 1 return super().predict_proba(X) X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0]) # no decision function clf = MockKNeighborsClassifier(n_neighbors=1) clf.fit(X, y) scorers = ['roc_auc', 'neg_log_loss'] scorer_dict, _ = _check_multimetric_scoring(clf, scorers) scorer = _MultimetricScorer(**scorer_dict) scorer(clf, X, y) assert predict_proba_call_cnt == 1
def fit(self, estimator, x, y=None, sample_weight=None): x = check_array(x, allow_multivariate=False) y = check_array(y, ensure_2d=False) random_state = check_random_state(self.random_state) if x.shape[0] != y.shape[0]: raise ValueError( "expected the same number of samples (%d) and labels (%d)" % (x.shape[0], y.shape[0]) ) if self.n_interval == "sqrt": n_interval = math.ceil(math.sqrt(x.shape[-1])) elif self.n_interval == "log": n_interval = math.ceil(math.log2(x.shape[-1])) elif isinstance(self.n_interval, numbers.Integral): n_interval = self.n_interval elif isinstance(self.n_interval, numbers.Real): if not 0 < self.n_interval <= 1: raise ValueError( "n_interval (%r) not in range [0, 1[" % self.n_interval ) n_interval = math.floor(x.shape[-1] * self.n_interval) else: raise ValueError("unsupported n_interval, got %r" % self.n_interval) if callable(self.scoring): scoring = self.scoring elif self.scoring is None or isinstance(self.scoring, str): scoring = check_scoring(estimator, self.scoring) else: scoring_dict = _check_multimetric_scoring(estimator, self.scoring) scoring = _MultimetricScorer(**scoring_dict) if isinstance(self.domain, str): self.domain_ = _PERMUTATION_DOMAIN.get(self.domain, None)() if self.domain_ is None: raise ValueError("domain (%s) is not supported" % self.domain) else: self.domain_ = self.domain x_transform = self.domain_.transform(x=x) self.intervals_ = list( self.domain_.intervals(x_transform.shape[-1], n_interval) ) scores = [] for iter, (start, end) in enumerate(self.intervals_): if self.verbose: print( f"Running iteration {iter + 1} of " f"{len(self.intervals_)}. {start}:{end}" ) x_perm_transform = x_transform.copy() rep_scores = [] for rep in range(self.n_repeat): self.domain_.randomize( x_perm_transform, start, end, random_state=random_state ) x_perm_inverse = self.domain_.inverse_transform(x_perm_transform) if sample_weight is not None: score = scoring( estimator, x_perm_inverse, y, sample_weight=sample_weight ) else: score = scoring(estimator, x_perm_inverse, y) rep_scores.append(score) if isinstance(rep_scores[0], dict): scores.append(_aggregate_score_dicts(rep_scores)) else: scores.append(rep_scores) if sample_weight is not None: self.baseline_score_ = scoring(estimator, x, y, sample_weight=sample_weight) else: self.baseline_score_ = scoring(estimator, x, y) if self.verbose: print(f"Baseline score is: {self.baseline_score_}") if isinstance(self.baseline_score_, dict): self.importances_ = { name: _unpack_scores( self.baseline_score_[name], np.array([scores[i][name] for i in range(n_interval)]), ) for name in self.baseline_score_ } else: self.importances_ = _unpack_scores(self.baseline_score_, np.array(scores)) return self