Пример #1
0
def _check_fit_params(X, fit_params, indices=None):
    """Check and validate the parameters passed during `fit`.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Data array.

    fit_params : dict
        Dictionary containing the parameters passed at fit.

    indices : array-like of shape (n_samples,), default=None
        Indices to be selected if the parameter has the same size as
        `X`.

    Returns
    -------
    fit_params_validated : dict
        Validated parameters. We ensure that the values support
        indexing.
    """
    try:
        from sklearn.utils.validation import \
            _check_fit_params as _sklearn_check_fit_params
        return _sklearn_check_fit_params(X, fit_params, indices)
    except ImportError:
        from sklearn.model_selection import _validation

        fit_params_validated = \
            {k: _validation._index_param_value(X, v, indices)
             for k, v in fit_params.items()}
        return fit_params_validated
Пример #2
0
Файл: ifs.py Проект: teopir/ifqi
def _my_fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
                        method):
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, Y_test = _safe_split(estimator, X, y, test, train)

    if y_train is None:
        estimator.fit(X_train, **fit_params)
    else:
        estimator.fit(X_train, y_train, **fit_params)
    func = getattr(estimator, method)
    predictions = func(X_test)
    if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
        n_classes = len(set(y))
        predictions_ = np.zeros((X_test.shape[0], n_classes))
        if method == 'decision_function' and len(estimator.classes_) == 2:
            predictions_[:, estimator.classes_[-1]] = predictions
        else:
            predictions_[:, estimator.classes_] = predictions
        predictions = predictions_
    scores = r2_score(y_true=Y_test, y_pred=predictions,
                      multioutput='raw_values')
    return predictions, test, scores
Пример #3
0
def _my_fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
                        method):
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, Y_test = _safe_split(estimator, X, y, test, train)

    if y_train is None:
        estimator.fit(X_train, **fit_params)
    else:
        estimator.fit(X_train, y_train, **fit_params)
    func = getattr(estimator, method)
    predictions = func(X_test)
    if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
        n_classes = len(set(y))
        predictions_ = np.zeros((X_test.shape[0], n_classes))
        if method == 'decision_function' and len(estimator.classes_) == 2:
            predictions_[:, estimator.classes_[-1]] = predictions
        else:
            predictions_[:, estimator.classes_] = predictions
        predictions = predictions_
    scores = r2_score(y_true=Y_test,
                      y_pred=predictions,
                      multioutput='raw_values')
    return predictions, test, scores
Пример #4
0
def _fe_fit_and_score(estimator,
                      X,
                      y,
                      scorers,
                      train,
                      test,
                      verbose,
                      parameters,
                      fit_params,
                      return_train_score=False,
                      return_parameters=False,
                      fold_specific_X_extractor=None):
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                                    for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)
    if fold_specific_X_extractor:
        # extend by fold-specific features
        X_train_additional = fold_specific_X_extractor(train)
        if X_train_additional is not None:
            X_train = np.concatenate([X_train, X_train_additional], axis=1)
    #
    if y_train is None:
        estimator.fit(X_train, **fit_params)
    else:
        estimator.fit(X_train, y_train, **fit_params)
    #
    test_scores = [
        _score(estimator, X_test, y_test, scorer) for scorer in scorers
    ]
    if return_train_score:
        train_scores = [
            _score(estimator, X_train, y_train, scorer) for scorer in scorers
        ]
    #
    if verbose > 2:
        msg += ", scores=%s" % test_scores
    if verbose > 1:
        print("[CV] %s %s" % ((64 - len(msg)) * '.', msg))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_parameters:
        ret.append(parameters)
    return ret
Пример #5
0
    def _base_est_fit_predict(self, X, y, **fit_params):
        """Fit the base estimators on CV training folds, and return their
        out-of-sample predictions on the test folds as features for the
        meta-estimator. Also return the fit_params for the meta-estimator.
        """
        y = y.squeeze()
        # Construct CV iterator
        cv = self._check_cv(y=y)
        # Extract CV indices since we need them twice, and un-seeded CV
        # generators with `shuffle=True` split differently each time.
        train_inds = []
        test_inds = []
        for train, test in cv.split(X, y):
            train_inds.append(train)
            test_inds.append(test)

        fit_params_ests = self._extract_fit_params(**fit_params)
        _fit_predict = self._get_fit_predict_function()

        _jobs = []

        # Loop over CV folds to get out-of-sample predictions, which become the
        # features for the meta-estimator.
        for train, test in zip(train_inds, test_inds):
            for name, est in self.estimator_list[:-1]:
                # adapted from sklearn.model_selection._fit_and_predict
                # Adjust length of sample weights
                fit_params_est_adjusted = dict([
                    (k, _index_param_value(X, v, train))
                    for k, v in fit_params_ests[name].items()
                ])

                # Fit estimator on training set and score out-of-sample
                _jobs.append(
                    delayed(_fit_predict)(clone(est), X[train], y[train],
                                          X[test], **fit_params_est_adjusted))

        _out = Parallel(n_jobs=self.n_jobs,
                        verbose=self.verbose,
                        pre_dispatch=self.pre_dispatch)(_jobs)

        # Extract the results from joblib
        Xmeta, ymeta = None, None
        for test in test_inds:
            ybase = np.empty((y[test].shape[0], 0))
            for name, est in self.estimator_list[:-1]:
                # Build design matrix out of out-of-sample predictions
                ybase = np.hstack((ybase, _out.pop(0)))

            # Append the test outputs to what will eventually be the features
            # for the meta-estimator.
            if Xmeta is not None:
                ymeta = np.concatenate((ymeta, y[test]))
                Xmeta = np.vstack((Xmeta, ybase))
            else:
                Xmeta = ybase
                ymeta = y[test]

        return Xmeta, ymeta, fit_params_ests[self.meta_estimator_name]
Пример #6
0
def _omic_fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
                          lbl_type):

    #TODO: _index_param_values is deprecated in newer versions of scikit-learn
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    X_train, y_train = _omic_safe_split(estimator, X, y, train)
    X_test, _ = _omic_safe_split(estimator, X, y, test, train)

    if y_train is None:
        estimator.fit(X_train, **fit_params)
    else:
        estimator.fit(X_train, y_train, **fit_params)

    return estimator.parse_preds(estimator.predict_omic(X_test, lbl_type))
Пример #7
0
def _fit_and_predict(estimator, X, y, train, test, class_ratio, verbose,
                     fit_params, method):
    from sklearn.utils.metaestimators import _safe_split
    from sklearn.model_selection._validation import _index_param_value
    from imblearn.under_sampling import RandomUnderSampler

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    rus = RandomUnderSampler(ratio=class_ratio,
                             return_indices=True,
                             random_state=42)
    if len(X.shape) < 2:
        X0 = X[0][train]
        y_train = y[train]

        idxs = rus.fit_sample(X0, y_train)
        X_train = np.empty(shape=(len(idxs), X.shape[0], X0.shape[1]))
        X_test = np.empty(shape=(len(test), X.shape[0], X0.shape[1]))
        for i in range(X.shape[0]):
            X_train[:, i, :] = X[i][train][idxs].toarray()
            X_test[:, i, :] = X[i][test].toarray()

        y_train = to_categorical(y_train[idxs])
    else:
        X_train, y_train = _safe_split(estimator, X, y, train)
        X_test, _ = _safe_split(estimator, X, y, test, train)

        idxs = rus.fit_sample(X_train, y_train)
        X_train = X_train[idxs]
        y_train = y_train[idxs]

    clf = estimator.fit(X_train, y_train, **fit_params)

    func = getattr(estimator, method)

    logger.info('-- predict_proba()')
    predictions = func(X_test)
    return predictions, test, pd.DataFrame(clf.cv_results_)
Пример #8
0
def _fit_and_score_keras(estimator,
                         X,
                         y,
                         scorer,
                         train,
                         test,
                         verbose,
                         parameters,
                         fit_params,
                         return_train_score=False,
                         return_parameters=False,
                         return_n_test_samples=False,
                         return_times=False,
                         error_score='raise',
                         session=None):
    """Fit estimator and compute scores for a given dataset split for KerasClassifier and KerasRegressor.

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like of shape at least 2D
        The data to fit.

    y : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.

    scorer : A single callable or dict mapping scorer name to the callable
        If it is a single callable, the return value for ``train_scores`` and
        ``test_scores`` is a single float.

        For a dict, it should be one mapping the scorer name to the scorer
        callable object / function.

        The callable object / fn should have signature
        ``scorer(estimator, X, y)``.

    train : array-like, shape (n_train_samples,)
        Indices of training samples.

    test : array-like, shape (n_test_samples,)
        Indices of test samples.

    verbose : integer
        The verbosity level.

    error_score : 'raise' (default) or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised. If a numeric value is given,
        FitFailedWarning is raised. This parameter does not affect the refit
        step, which will always raise the error.

    parameters : dict or None
        Parameters to be set on the estimator.

    fit_params : dict or None
        Parameters that will be passed to ``estimator.fit``.

    return_train_score : boolean, optional, default: False
        Compute and return score on training set.

    return_parameters : boolean, optional, default: False
        Return parameters that has been used for the estimator.

    return_n_test_samples : boolean, optional, default: False
        Whether to return the ``n_test_samples``

    return_times : boolean, optional, default: False
        Whether to return the fit/score times.

    session : Keras backend with a tensorflow session attached
        The keras backend session for applying K.clear_session()
        after the classifier or regressor has been train and scored
        given the split. This is mainly required to avoid posible
        Out Of Memory errors with tensorflow not deallocating the
        GPU memory after each iteration of the Cross Validation.

    Returns
    -------
    train_scores : dict of scorer name -> float, optional
        Score on training set (for all the scorers),
        returned only if `return_train_score` is `True`.

    test_scores : dict of scorer name -> float, optional
        Score on testing set (for all the scorers).

    n_test_samples : int
        Number of test samples.

    fit_time : float
        Time spent for fitting in seconds.

    score_time : float
        Time spent for scoring in seconds.

    parameters : dict or None, optional
        The parameters that have been evaluated.
    """
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                                    for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    test_scores = {}
    train_scores = {}
    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    is_multimetric = not callable(scorer)
    n_scorers = len(scorer.keys()) if is_multimetric else 1

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            if is_multimetric:
                test_scores = dict(
                    zip(scorer.keys(), [
                        error_score,
                    ] * n_scorers))
                if return_train_score:
                    train_scores = dict(
                        zip(scorer.keys(), [
                            error_score,
                        ] * n_scorers))
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn(
                "Classifier fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%r" % (error_score, e), FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        # _score will return dict if is_multimetric is True
        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_scores = _score(estimator, X_train, y_train, scorer,
                                  is_multimetric)

    if verbose > 2:
        if is_multimetric:
            for scorer_name, score in test_scores.items():
                msg += ", %s=%s" % (scorer_name, score)
        else:
            msg += ", score=%s" % test_scores
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    # The estimator is erased
    del estimator
    # We assign the keras backend
    K = session
    # Clean the session
    K.clear_session()
    # The garbage collector is called in order to ensure that the estimator is erased from memory
    for i in range(15):
        gc.collect()
    return ret
Пример #9
0
def _fit_and_score(estimator, X, y, dy, scorer, train, test, verbose,
                   parameters, fit_params, return_train_score=False,
                   return_parameters=False, return_n_test_samples=False,
                   return_times=False, return_estimator=False,
                   error_score='raise-deprecating'):
    """Fit estimator and compute scores for a given dataset split.
    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.
    X : array-like of shape at least 2D
        The data to fit.
    y : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.
    scorer : A single callable or dict mapping scorer name to the callable
        If it is a single callable, the return value for ``train_scores`` and
        ``test_scores`` is a single float.
        For a dict, it should be one mapping the scorer name to the scorer
        callable object / function.
        The callable object / fn should have signature
        ``scorer(estimator, X, y)``.
    train : array-like, shape (n_train_samples,)
        Indices of training samples.
    test : array-like, shape (n_test_samples,)
        Indices of test samples.
    verbose : integer
        The verbosity level.
    error_score : 'raise' | 'raise-deprecating' or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If set to 'raise-deprecating', a FutureWarning is printed before the
        error is raised.
        If a numeric value is given, FitFailedWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.
        Default is 'raise-deprecating' but from version 0.22 it will change
        to np.nan.
    parameters : dict or None
        Parameters to be set on the estimator.
    fit_params : dict or None
        Parameters that will be passed to ``estimator.fit``.
    return_train_score : boolean, optional, default: False
        Compute and return score on training set.
    return_parameters : boolean, optional, default: False
        Return parameters that has been used for the estimator.
    return_n_test_samples : boolean, optional, default: False
        Whether to return the ``n_test_samples``
    return_times : boolean, optional, default: False
        Whether to return the fit/score times.
    return_estimator : boolean, optional, default: False
        Whether to return the fitted estimator.
    Returns
    -------
    train_scores : dict of scorer name -> float, optional
        Score on training set (for all the scorers),
        returned only if `return_train_score` is `True`.
    test_scores : dict of scorer name -> float, optional
        Score on testing set (for all the scorers).
    n_test_samples : int
        Number of test samples.
    fit_time : float
        Time spent for fitting in seconds.
    score_time : float
        Time spent for scoring in seconds.
    parameters : dict or None, optional
        The parameters that have been evaluated.
    estimator : estimator object
        The fitted estimator
    """
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                          for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                      for k, v in fit_params.items()])

    train_scores = {}
    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train, dy_train = _safe_split(estimator, X, y, dy, train)
    X_test, y_test, dy_test = _safe_split(estimator, X, y, dy, test, train)

    is_multimetric = not callable(scorer)
    n_scorers = len(scorer.keys()) if is_multimetric else 1

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, dy_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif error_score == 'raise-deprecating':
            warnings.warn("From version 0.22, errors during fit will result "
                          "in a cross validation score of NaN by default. Use "
                          "error_score='raise' if you want an exception "
                          "raised or error_score=np.nan to adopt the "
                          "behavior from version 0.22.",
                          FutureWarning)
            raise
        elif isinstance(error_score, numbers.Number):
            if is_multimetric:
                test_scores = dict(zip(scorer.keys(),
                                   [error_score, ] * n_scorers))
                if return_train_score:
                    train_scores = dict(zip(scorer.keys(),
                                        [error_score, ] * n_scorers))
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn("Estimator fit failed. The score on this train-test"
                          " partition for these parameters will be set to %f. "
                          "Details: \n%s" %
                          (error_score, format_exception_only(type(e), e)[0]),
                          FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        # _score will return dict if is_multimetric is True
        test_scores = _score(estimator, X_test, y_test, dy_test, scorer, is_multimetric)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_scores = _score(estimator, X_train, y_train, dy_train, scorer,
                                  is_multimetric)

    if verbose > 2:
        if is_multimetric:
            for scorer_name, score in test_scores.items():
                msg += ", %s=%s" % (scorer_name, score)
        else:
            msg += ", score=%s" % test_scores
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    if return_estimator:
        ret.append(estimator)
    return ret
Пример #10
0
def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                   parameters, fit_params, return_train_score=False,
                   return_parameters=False, return_n_test_samples=False,
                   return_times=False, error_score='raise'):
    """
    Fit estimator and compute scores for a given dataset split.
    """
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                                    for k, v in parameters.items()))
        LOG.info("[CV] %s %s", msg, (64 - len(msg)) * '.')

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            if return_train_score:
                train_score = error_score
            warnings.warn("Classifier fit failed. The score on this train-test"
                          " partition for these parameters will be set to %f. "
                          "Details: \n%r" % (error_score, e), FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        test_score = [_score(estimator, X_test, y_test, s) for s in scorer]
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_score = [_score(estimator, X_train, y_train, s)
                           for s in scorer]

    if verbose > 2:
        msg += ", score=".join(('%f' % ts for ts in test_score))
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        LOG.info("[CV] %s %s", (64 - len(end_msg)) * '.', end_msg)

    ret = [train_score, test_score] if return_train_score else [test_score]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    return ret
Пример #11
0
    def fit(
            self,
            X,  # type: TwoDimArrayLikeType
            y=None,  # type: Optional[Union[OneDimArrayLikeType, TwoDimArrayLikeType]]
            groups=None,  # type: Optional[OneDimArrayLikeType]
            **fit_params  # type: Any
    ):
        # type: (...) -> 'OptunaSearchCV'
        """Run fit with all sets of parameters.

        Args:
            X:
                Training data.

            y:
                Target variable.

            groups:
                Group labels for the samples used while splitting the dataset
                into train/test set.

            **fit_params:
                Parameters passed to ``fit`` on the estimator.

        Returns:
            self:
                Return self.
        """

        self._check_params()

        random_state = check_random_state(self.random_state)
        max_samples = self.subsample
        n_samples = _num_samples(X)
        old_level = logger.getEffectiveLevel()

        if self.verbose > 1:
            logger.setLevel(DEBUG)
        elif self.verbose > 0:
            logger.setLevel(INFO)
        else:
            logger.setLevel(WARNING)

        self.sample_indices_ = np.arange(n_samples)

        if type(max_samples) is float:
            max_samples = int(max_samples * n_samples)

        if max_samples < n_samples:
            self.sample_indices_ = random_state.choice(self.sample_indices_,
                                                       max_samples,
                                                       replace=False)

            self.sample_indices_.sort()

        X_res = safe_indexing(X, self.sample_indices_)
        y_res = safe_indexing(y, self.sample_indices_)
        groups_res = safe_indexing(groups, self.sample_indices_)
        fit_params_res = fit_params

        if fit_params_res is not None:
            fit_params_res = {
                key: _index_param_value(X, value, self.sample_indices_)
                for key, value in fit_params.items()
            }

        classifier = is_classifier(self.estimator)
        cv = check_cv(self.cv, y_res, classifier)

        self.n_splits_ = cv.get_n_splits(X_res, y_res, groups=groups_res)
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        if self.study is None:
            seed = random_state.randint(0, np.iinfo('int32').max)
            sampler = samplers.TPESampler(seed=seed)

            self.study_ = study_module.create_study(direction='maximize',
                                                    sampler=sampler)

        else:
            self.study_ = self.study

        objective = _Objective(self.estimator, self.param_distributions, X_res,
                               y_res, cv, self.enable_pruning,
                               self.error_score, fit_params_res, groups_res,
                               self.max_iter, self.return_train_score,
                               self.scorer_)

        logger.info('Searching the best hyperparameters using {} '
                    'samples...'.format(_num_samples(self.sample_indices_)))

        self.study_.optimize(objective,
                             n_jobs=self.n_jobs,
                             n_trials=self.n_trials,
                             timeout=self.timeout)

        logger.info('Finished hyperparemeter search!')

        if self.refit:
            self._refit(X, y, **fit_params)

        logger.setLevel(old_level)

        return self
Пример #12
0
def _fit_and_score_clean_test(estimator,
                              X,
                              y,
                              scorer,
                              train,
                              test,
                              verbose,
                              parameters,
                              fit_params,
                              return_train_score=False,
                              return_parameters=False,
                              return_n_test_samples=False,
                              return_times=False,
                              error_score='raise'):
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                                    for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    # MOD Get y_clean if available
    fit_params = fit_params.copy()
    y_clean = fit_params.pop('y_clean', None)
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    test_scores = {}
    train_scores = {}
    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    # MOD use y_clean for test if available
    X_test, y_test = _safe_split(estimator, X,
                                 y_clean if y_clean is not None else y, test,
                                 train)

    is_multimetric = not callable(scorer)
    n_scorers = len(scorer.keys()) if is_multimetric else 1

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            if is_multimetric:
                test_scores = dict(
                    zip(scorer.keys(), [
                        error_score,
                    ] * n_scorers))
                if return_train_score:
                    train_scores = dict(
                        zip(scorer.keys(), [
                            error_score,
                        ] * n_scorers))
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn(
                "Classifier fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%r" % (error_score, e), FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        # _score will return dict if is_multimetric is True
        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_scores = _score(estimator, X_train, y_train, scorer,
                                  is_multimetric)

    if verbose > 2:
        if is_multimetric:
            for scorer_name, score in test_scores.items():
                msg += ", %s=%s" % (scorer_name, score)
        else:
            msg += ", score=%s" % test_scores
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    return ret
Пример #13
0
def _fit_and_score(
    estimator,
    X,
    y,
    scorer,
    train,
    test,
    verbose,
    parameters,
    fit_params,
    return_train_score=False,
    return_parameters=False,
    return_n_test_samples=False,
    return_times=False,
    error_score="raise",
):
    """
    Fit estimator and compute scores for a given dataset split.
    """
    if verbose > 1:
        if parameters is None:
            msg = ""
        else:
            msg = "%s" % (", ".join("%s=%s" % (k, v)
                                    for k, v in parameters.items()))
        LOG.info("[CV] %s %s", msg, (64 - len(msg)) * ".")

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == "raise":
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            if return_train_score:
                train_score = error_score
            warnings.warn(
                "Classifier fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%r" % (error_score, e),
                FitFailedWarning,
            )
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        test_score = [_score(estimator, X_test, y_test, s) for s in scorer]
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_score = [
                _score(estimator, X_train, y_train, s) for s in scorer
            ]

    if verbose > 2:
        msg += ", score=".join(("%f" % ts for ts in test_score))
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        LOG.info("[CV] %s %s", (64 - len(end_msg)) * ".", end_msg)

    ret = [train_score, test_score] if return_train_score else [test_score]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    return ret
Пример #14
0
def _model_fit_and_score(estimator_str,
                         X,
                         y,
                         scorer,
                         train,
                         test,
                         verbose,
                         parameters,
                         fit_params,
                         return_train_score=False,
                         return_parameters=False,
                         return_n_test_samples=False,
                         return_times=False,
                         error_score='raise'):
    """

    """
    if verbose > 1:
        msg = '[CV model=%s]' % estimator_str.upper()
        if parameters is not None:
            msg += ' %s' % (', '.join('%s=%s' % (k, v)
                                      for k, v in parameters.items()))
        LOG.info("%s %s", msg, (89 - len(msg)) * '.')

    estimator = _clf_build(estimator_str)

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            if return_train_score:
                train_score = error_score
            warnings.warn(
                "Classifier fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%r" % (error_score, e), FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        scorer = check_scoring(estimator, scoring=scorer)
        test_score = _score(estimator, X_test, y_test, scorer)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_score = _score(estimator, X_train, y_train, scorer)

    if verbose > 2:
        msg += ", score=%f" % test_score
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        LOG.info(end_msg)

    ret = [train_score, test_score] if return_train_score else [test_score]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append((estimator_str, parameters))
    return ret
Пример #15
0
def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                   parameters, fit_params, return_train_score=False,
                   return_parameters=False, return_n_test_samples=False,
                   return_times=False, return_estimator=False,
                   error_score=np.nan):
    """override the sklearn.model_selection._validation._fit_and_score

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.
    X : array-like of shape at least 2D
        The data to fit.
    y : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.
    scorer : A single callable or dict mapping scorer name to the callable
        If it is a single callable, the return value for ``train_scores`` and
        ``test_scores`` is a single float.
        For a dict, it should be one mapping the scorer name to the scorer
        callable object / function.
        The callable object / fn should have signature
        ``scorer(estimator, X, y)``.
    train : array-like, shape (n_train_samples,)
        Indices of training samples.
    test : array-like, shape (n_test_samples,)
        Indices of test samples.
    verbose : integer
        The verbosity level.
    error_score : 'raise' or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If a numeric value is given, FitFailedWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.
    parameters : dict or None
        Parameters to be set on the estimator.
    fit_params : dict or None
        Parameters that will be passed to ``estimator.fit``.
    return_train_score : boolean, optional, default: False
        Compute and return score on training set.
    return_parameters : boolean, optional, default: False
        Return parameters that has been used for the estimator.
    return_n_test_samples : boolean, optional, default: False
        Whether to return the ``n_test_samples``
    return_times : boolean, optional, default: False
        Whether to return the fit/score times.
    return_estimator : boolean, optional, default: False
        Whether to return the fitted estimator.
    Returns
    -------
    train_scores : dict of scorer name -> float, optional
        Score on training set (for all the scorers),
        returned only if `return_train_score` is `True`.
    test_scores : dict of scorer name -> float, optional
        Score on testing set (for all the scorers).
    n_test_samples : int
        Number of test samples.
    fit_time : float
        Time spent for fitting in seconds.
    score_time : float
        Time spent for scoring in seconds.
    parameters : dict or None, optional
        The parameters that have been evaluated.
    estimator : estimator object
        The fitted estimator
    """
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                          for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = {k: _index_param_value(X, v, train)
                  for k, v in fit_params.items()}

    train_scores = {}
    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    is_multimetric = not callable(scorer)
    n_scorers = len(scorer.keys()) if is_multimetric else 1

    ##########################################################
    # Changes on sklearn.model_selection._search
    estimator_params = estimator.get_params()

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            for param in estimator_params.keys():
                # suppose work for both pipeline or single
                # keras model
                if param.endswith('validation_data'):
                    fit_params.update(
                        {param: (X_test, y_test)})
                    break
            estimator.fit(X_train, y_train, **fit_params)
    # Changes end
    ##########################################################

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            if is_multimetric:
                test_scores = dict(zip(scorer.keys(),
                                   [error_score, ] * n_scorers))
                if return_train_score:
                    train_scores = dict(zip(scorer.keys(),
                                        [error_score, ] * n_scorers))
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn("Estimator fit failed. The score on this train-test"
                          " partition for these parameters will be set to %f. "
                          "Details: \n%s" %
                          (error_score, format_exception_only(type(e), e)[0]),
                          FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        # _score will return dict if is_multimetric is True
        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_scores = _score(estimator, X_train, y_train, scorer,
                                  is_multimetric)
    if verbose > 2:
        if is_multimetric:
            for scorer_name in sorted(test_scores):
                msg += ", %s=" % scorer_name
                if return_train_score:
                    msg += "(train=%.3f," % train_scores[scorer_name]
                    msg += " test=%.3f)" % test_scores[scorer_name]
                else:
                    msg += "%.3f" % test_scores[scorer_name]
        else:
            msg += ", score="
            msg += ("%.3f" % test_scores if not return_train_score else
                    "(train=%.3f, test=%.3f)" % (train_scores, test_scores))

    if verbose > 1:
        total_time = score_time + fit_time
        print(_message_with_time('CV', msg, total_time))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    if return_estimator:
        ret.append(estimator)
    return ret
Пример #16
0
def _fit_and_score_with_extra_data(estimator,
                                   X,
                                   y,
                                   scorer,
                                   train,
                                   test,
                                   verbose,
                                   parameters,
                                   fit_params,
                                   return_train_score=False,
                                   return_parameters=False,
                                   return_n_test_samples=False,
                                   return_times=False,
                                   error_score='raise',
                                   return_estimator=False):
    """Fit estimator and compute scores for a given dataset split. Allows for scorers that hold more information than
    just a vanilla scorer (ie, Frankenscorer!)

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like of shape at least 2D
        The data to fit.

    y : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.

    scorer : callable
        A scorer callable object / function with signature
        ``scorer(estimator, X, y)``. which returns a (dict, score) where the dict is relevant scoring data

    train : array-like, shape (n_train_samples,)
        Indices of training samples.

    test : array-like, shape (n_test_samples,)
        Indices of test samples.

    verbose : integer
        The verbosity level.

    error_score : 'raise' (default) or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised. If a numeric value is given,
        FitFailedWarning is raised. This parameter does not affect the refit
        step, which will always raise the error.

    parameters : dict or None
        Parameters to be set on the estimator.

    fit_params : dict or None
        Parameters that will be passed to ``estimator.fit``.

    return_train_score : boolean, optional, default: False
        Compute and return score on training set.

    return_parameters : boolean, optional, default: False
        Return parameters that has been used for the estimator.

    return_estimator : boolean, optional, default: False
        Return the fit estimator

    Returns
    -------
    train_score_data : dict, optional
        Dictionary of scores on training set, returned only if `return_train_score` is `True`

    train_score : float, optional
        Score on training set, returned only if `return_train_score` is `True`.

    test_score_data : dict
        Dictionary of scores on test set

    test_score : float
        Score on test set.

    n_test_samples : int
        Number of test samples.

    fit_time : float
        Time spent for fitting in seconds.

    score_time : float
        Time spent for scoring in seconds.

    parameters : dict or None, optional
        The parameters that have been evaluated.

    estimator : fit estimator, optional
        The estimator that is fit
    """
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                                    for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            test_score_data = {}
            if return_train_score:
                train_score = error_score
                train_score_data = {}
            warnings.warn(
                "Classifier fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%r" % (error_score, e), FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        test_score_data, test_score = _score_no_number_check(
            estimator, X_test, y_test, scorer)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_score_data, train_score = _score_no_number_check(
                estimator, X_train, y_train, scorer)

    if verbose > 2:
        msg += ", score=%s score_data=%s" % (test_score, test_score_data)
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))

    ret = [train_score_data, train_score, test_score_data, test_score] if return_train_score \
        else [test_score_data, test_score]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    if return_estimator:
        ret.append(estimator)
    return ret
def monkeypatch_fit_and_score(estimator, X, y, scorer, train, test, verbose,
                              parameters, fit_params, return_train_score=False,
                              return_parameters=False, return_n_test_samples=False,
                              return_times=False, return_estimator=False,
                              error_score='raise-deprecating'):
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                          for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                      for k, v in fit_params.items()])

    test_scores = {}
    train_scores = {}
    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    is_multimetric = not callable(scorer)
    n_scorers = len(scorer.keys()) if is_multimetric else 1

    # ===================================================================
    # BEGIN MONKEYPATCH MODIFICATION
    # ===================================================================

    try:
        if isinstance(estimator, Pipeline):
            pipe = estimator
            est_name, estimator = pipe.steps.pop()

            fit_params_est = {}
            fit_param_keys = fit_params.keys()

            for pname in fit_param_keys:
                step, param = pname.split('__', 1)
                if step == est_name:
                    fit_params_est[param] = fit_params.pop(pname)

        else:
            pipe = None

        if y_train is None:
            if pipe is not None:
                X_train = pipe.fit_transform(X_train, **fit_params)
                X_test = pipe.transform(X_test, **fit_params)
                fit_params = fit_params_est
            estimator.fit(X_train, **fit_params)
        else:
            if pipe is not None:
                X_train = pipe.fit_transform(X_train, y_train, **fit_params)
                X_test = pipe.transform(X_test, **fit_params)
                fit_params = fit_params_est
            estimator.fit(X_train, y_train, **fit_params)

    # ===================================================================
    # END MONKEYPATCH MODIFICATION
    # ===================================================================

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            if is_multimetric:
                test_scores = dict(zip(scorer.keys(),
                                   [error_score, ] * n_scorers))
                if return_train_score:
                    train_scores = dict(zip(scorer.keys(),
                                        [error_score, ] * n_scorers))
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn("Classifier fit failed. The score on this train-test"
                          " partition for these parameters will be set to %f. "
                          "Details: \n%r" % (error_score, e), FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        # _score will return dict if is_multimetric is True
        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_scores = _score(estimator, X_train, y_train, scorer,
                                  is_multimetric)

    if verbose > 2:
        if is_multimetric:
            for scorer_name, score in test_scores.items():
                msg += ", %s=%s" % (scorer_name, score)
        else:
            msg += ", score=%s" % test_scores
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    return ret
Пример #18
0
def nested_fit_and_score(
        estimator, X, y, scorer, train, test, verbose=1,
        parameters=None, fit_params=None, return_train_score=False,
        return_times=False, error_score='raise'):
    """

    """
    from sklearn.externals.joblib.logger import short_format_time

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                      for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    if verbose > 1:
        LOG.info('CV iteration: Xtrain=%d, Ytrain=%d/%d -- Xtest=%d, Ytest=%d/%d.',
                 len(X_train), len(X_train) - sum(y_train), sum(y_train),
                 len(X_test), len(X_test) - sum(y_test), sum(y_test))

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            if return_train_score:
                train_score = error_score
            LOG.warn("Classifier fit failed. The score on this train-test"
                     " partition for these parameters will be set to %f. "
                     "Details: \n%r", error_score, e)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time

        test_score = None
        score_time = 0.0
        if len(set(y_test)) > 1:
            test_score = _score(estimator, X_test, y_test, scorer)
            score_time = time.time() - start_time - fit_time
        else:
            LOG.warn('Test set has no positive labels, scoring has been skipped '
                     'in this loop.')

        if return_train_score:
            train_score = _score(estimator, X_train, y_train, scorer)

        acc_score = _score(estimator, X_test, y_test,
                           check_scoring(estimator, scoring='accuracy'))

    if verbose > 0:
        total_time = score_time + fit_time
        if test_score is not None:
            LOG.info('Iteration took %s, score=%f, accuracy=%f.',
                     short_format_time(total_time), test_score, acc_score)
        else:
            LOG.info('Iteration took %s, score=None, accuracy=%f.',
                     short_format_time(total_time), acc_score)

    ret = {
        'test': {'score': test_score, 'accuracy': acc_score}
    }

    if return_train_score:
        ret['train'] = {'score': train_score}

    if return_times:
        ret['times'] = [fit_time, score_time]

    return ret, estimator
Пример #19
0
def _fit_and_score(estimator,
                   X,
                   y,
                   scorer,
                   train,
                   test,
                   verbose,
                   parameters,
                   fit_params,
                   return_train_score=False,
                   return_parameters=False,
                   return_n_test_samples=False,
                   return_times=False,
                   error_score='raise',
                   to_evaluate=None):
    """
    Fit estimator and compute scores for a given dataset split.
    """

    #if verbose > 1:
    if parameters is None:
        msg = ''
    else:
        msg = '%s' % (', '.join('%s=%s' % (k, v)
                                for k, v in parameters.items()))
    LOG.info("[CV] %s %s", msg, (64 - len(msg)) * '.')

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)
    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            if return_train_score:
                train_score = error_score
            warnings.warn(
                "Classifier fit failed. The score on this train-test"
                " partition for these parameters will be set to {}. "
                "Details: \n{} \n model: {}".format(error_score, e, estimator),
                FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        test_score = {s: _score(estimator, X_test, y_test, s) for s in scorer}

        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_score = [
                _score(estimator, X_train, y_train, s) for s in scorer
            ]

    #if verbose > 2:
    #msg += ", score=".join(('%f' % ts for ts in test_score))
    msg += ", score=".format(test_score)
    #if verbose > 1:
    total_time = score_time + fit_time
    end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
    LOG.info("[CV] %s %s", (64 - len(end_msg)) * '.', end_msg)

    ret = [train_score, test_score] if return_train_score else [test_score]
    ################################################################################################################
    if to_evaluate:
        res_evaluation = dict()
        for ds_name, ds_vals in to_evaluate.items():
            for s in scorer:
                res_evaluation[ds_name + "_" + s] = _score(
                    estimator, ds_vals["x"], ds_vals["y"], s)

    ################################################################################################################
    if return_n_test_samples:
        ret.append(_num_samples(X_test))

    if return_times:
        ret.extend([fit_time, score_time])

    if return_parameters:
        ret.append(parameters)

    if to_evaluate:
        ret.append(res_evaluation)
        """
        for k, v in res_evaluation.items():
            ret.append({k: v})
        """
    return np.squeeze(ret)
Пример #20
0
 def _check_fit_params(X, fit_params, indices=None):
     return {
         k: _index_param_value(X, v, indices)
         for k, v in fit_params.items()
     }
Пример #21
0
def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
                     method):
    """Fit estimator and predict values for a given dataset split.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    estimator : estimator object implementing 'fit' and 'predict'
        The object to use to fit the data.

    X : array-like of shape at least 2D
        The data to fit.

    y : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.

    train : array-like, shape (n_train_samples,)
        Indices of training samples.

    test : array-like, shape (n_test_samples,)
        Indices of test samples.

    verbose : integer
        The verbosity level.

    fit_params : dict or None
        Parameters that will be passed to ``estimator.fit``.

    method : string
        Invokes the passed method name of the passed estimator.

    Returns
    -------
    predictions : sequence
        Result of calling 'estimator.method'

    test : array-like
        This is the value of the test parameter
    """
    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if "indices" in inspect.signature(
            estimator.fit).parameters:  # the different part
        fit_params["indices"] = train

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, _ = _safe_split(estimator, X, y, test, train)

    if y_train is None:
        estimator.fit(X_train, **fit_params)
    else:
        estimator.fit(X_train, y_train, **fit_params)
    func = getattr(estimator, method)
    predictions = func(X_test)
    if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
        n_classes = len(set(y))
        predictions_ = np.zeros((X_test.shape[0], n_classes))
        if method == 'decision_function' and len(estimator.classes_) == 2:
            predictions_[:, estimator.classes_[-1]] = predictions
        else:
            predictions_[:, estimator.classes_] = predictions
        predictions = predictions_
    return predictions, test
Пример #22
0
def _fit_and_score(  # noqa
    estimator,
    X,
    y,
    scorer,
    train,
    test,
    verbose,
    parameters,
    fit_params,
    return_train_score=False,
    return_parameters=False,
    return_n_test_samples=False,
    return_times=False,
    return_estimator=False,
    error_score="raise-deprecating",
):
    if verbose > 1:
        if parameters is None:
            msg = ""
        else:
            msg = "%s" % (", ".join("%s=%s" % (k, v) for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * "."))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict(
        [(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]
    )

    train_scores = {}
    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    is_multimetric = not callable(scorer)
    n_scorers = len(scorer.keys()) if is_multimetric else 1

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == "raise":
            raise
        elif error_score == "raise-deprecating":
            warnings.warn(
                "From version 0.22, errors during fit will result "
                "in a cross validation score of NaN by default. Use "
                "error_score='raise' if you want an exception "
                "raised or error_score=np.nan to adopt the "
                "behavior from version 0.22.",
                FutureWarning,
            )
            raise
        elif isinstance(error_score, numbers.Number):
            if is_multimetric:
                test_scores = dict(zip(scorer.keys(), [error_score] * n_scorers))
                if return_train_score:
                    train_scores = dict(zip(scorer.keys(), [error_score] * n_scorers))
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn(
                "Estimator fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%s" % (error_score, format_exception_only(type(e), e)[0]),
                FitFailedWarning,
            )
        else:
            raise ValueError(
                "error_score must be the string 'raise' or a"
                " numeric value. (Hint: if using 'raise', please"
                " make sure that it has been spelled correctly.)"
            )

    else:
        fit_time = time.time() - start_time
        # _score will return dict if is_multimetric is True
        # **PATCH** - send train / test indices when use_index is True
        use_index = any(is_use_index(key) for key in scorer.keys())

        test_scores = _score(
            estimator,
            X_test,
            y_test,
            scorer,
            is_multimetric,
            use_or_not(use_index, test),
        )
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_scores = _score(
                estimator,
                X_train,
                y_train,
                scorer,
                is_multimetric,
                use_or_not(use_index, train),
            )

    if verbose > 2:
        if is_multimetric:
            for scorer_name, score in test_scores.items():
                msg += ", %s=%s" % (scorer_name, score)
        else:
            msg += ", score=%s" % test_scores
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * ".", end_msg))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    if return_estimator:
        ret.append(estimator)
    return ret
Пример #23
0
def nested_fit_and_score(estimator,
                         X,
                         y,
                         scorer,
                         train,
                         test,
                         verbose=1,
                         parameters=None,
                         fit_params=None,
                         return_train_score=False,
                         return_times=False,
                         error_score='raise'):
    """

    """
    from sklearn.externals.joblib.logger import short_format_time

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    if verbose > 1:
        LOG.info(
            'CV iteration: Xtrain=%d, Ytrain=%d/%d -- Xtest=%d, Ytest=%d/%d.',
            len(X_train),
            len(X_train) - sum(y_train), sum(y_train), len(X_test),
            len(X_test) - sum(y_test), sum(y_test))

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            if return_train_score:
                train_score = error_score
            LOG.warning(
                "Classifier fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%r", error_score, e)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time

        test_score = None
        score_time = 0.0
        if len(set(y_test)) > 1:
            test_score = _score(estimator, X_test, y_test, scorer)
            score_time = time.time() - start_time - fit_time
        else:
            LOG.warning(
                'Test set has no positive labels, scoring has been skipped '
                'in this loop.')

        if return_train_score:
            train_score = _score(estimator, X_train, y_train, scorer)

        acc_score = _score(estimator, X_test, y_test,
                           check_scoring(estimator, scoring='accuracy'))

    if verbose > 0:
        total_time = score_time + fit_time
        if test_score is not None:
            LOG.info('Iteration took %s, score=%f, accuracy=%f.',
                     short_format_time(total_time), test_score, acc_score)
        else:
            LOG.info('Iteration took %s, score=None, accuracy=%f.',
                     short_format_time(total_time), acc_score)

    ret = {'test': {'score': test_score, 'accuracy': acc_score}}

    if return_train_score:
        ret['train'] = {'score': train_score}

    if return_times:
        ret['times'] = [fit_time, score_time]

    return ret, estimator
Пример #24
0
 def _check_fit_params(X, fit_params, train):
     return {
         k: _index_param_value(X, v, train)
         for k, v in fit_params.items()
     }
Пример #25
0
def _fit_and_score(estimator,
                   X,
                   y,
                   scorer,
                   train,
                   test,
                   verbose,
                   parameters,
                   fit_params,
                   return_train_score=False,
                   return_parameters=False,
                   return_n_test_samples=False,
                   return_times=False,
                   error_score='raise'):
    """Fit estimator and compute scores for a given dataset split."""
    #  This code is adapted from sklearn
    from sklearn.model_selection._validation import _index_param_value
    from sklearn.utils.metaestimators import _safe_split
    from sklearn.utils.validation import _num_samples

    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                                    for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])

    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            test_score = error_score
            if return_train_score:
                train_score = error_score
            warn("Classifier fit failed. The score on this train-test"
                 " partition for these parameters will be set to %f. "
                 "Details: \n%r" % (error_score, e))
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        test_score = _score(estimator, X_test, y_test, scorer)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_score = _score(estimator, X_train, y_train, scorer)

    if verbose > 2:
        msg += ", score=%f" % test_score
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))

    ret = [train_score, test_score] if return_train_score else [test_score]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    return ret
Пример #26
0
def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                   parameters, fit_params, return_train_score=False,
                   return_parameters=False, return_n_test_samples=False,
                   return_times=False, error_score='raise',
                   return_estimator=False, return_idx=False):
    """Fit estimator and compute scores for a given dataset split.

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like of shape at least 2D
        The data to fit.

    y : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.

    scorer : A single callable or dict mapping scorer name to the callable
        If it is a single callable, the return value for ``train_scores`` and
        ``test_scores`` is a single float.

        For a dict, it should be one mapping the scorer name to the scorer
        callable object / function.

        The callable object / fn should have signature
        ``scorer(estimator, X, y)``.

    train : array-like, shape (n_train_samples,)
        Indices of training samples.

    test : array-like, shape (n_test_samples,)
        Indices of test samples.

    verbose : integer
        The verbosity level.

    error_score : 'raise' (default) or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised. If a numeric value is given,
        FitFailedWarning is raised. This parameter does not affect the refit
        step, which will always raise the error.

    parameters : dict or None
        Parameters to be set on the estimator.

    fit_params : dict or None
        Parameters that will be passed to ``estimator.fit``.

    return_train_score : boolean, optional, default: False
        Compute and return score on training set.

    return_parameters : boolean, optional, default: False
        Return parameters that has been used for the estimator.

    return_n_test_samples : boolean, optional, default: False
        Whether to return the ``n_test_samples``

    return_times : boolean, optional, default: False
        Whether to return the fit/score times.

    Returns
    -------
    train_scores : dict of scorer name -> float, optional
        Score on training set (for all the scorers),
        returned only if `return_train_score` is `True`.

    test_scores : dict of scorer name -> float, optional
        Score on testing set (for all the scorers).

    n_test_samples : int
        Number of test samples.

    fit_time : float
        Time spent for fitting in seconds.

    score_time : float
        Time spent for scoring in seconds.

    parameters : dict or None, optional
        The parameters that have been evaluated.

    """
    if verbose > 1:
        if parameters is None:
            msg = ''
        else:
            msg = '%s' % (', '.join('%s=%s' % (k, v)
                          for k, v in parameters.items()))
        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

    # Adjust length of sample weights
    fit_params = fit_params if fit_params is not None else {}
    fit_params = dict([(k, _index_param_value(X, v, train))
                      for k, v in fit_params.items()])

    test_scores = {}
    train_scores = {}
    if parameters is not None:
        estimator.set_params(**parameters)

    start_time = time.time()

    # do it for each patient
    X_train, y_train, X_test, y_test = _safe_split_multi(
        estimator, X, y, train, test)

    is_multimetric = not callable(scorer)
    n_scorers = len(scorer.keys()) if is_multimetric else 1

    try:
        if y_train is None:
            estimator.fit(X_train, **fit_params)
        else:
            estimator.fit(X_train, y_train, **fit_params)

    except Exception as e:
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            if is_multimetric:
                test_scores = dict(zip(scorer.keys(),
                                   [error_score, ] * n_scorers))
                if return_train_score:
                    train_scores = dict(zip(scorer.keys(),
                                        [error_score, ] * n_scorers))
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn("Classifier fit failed. The score on this train-test"
                          " partition for these parameters will be set to %f. "
                          "Details: \n%r" % (error_score, e), FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time
        # _score will return dict if is_multimetric is True
        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
        score_time = time.time() - start_time - fit_time
        if return_train_score:
            train_scores = _score(estimator, X_train, y_train, scorer,
                                  is_multimetric)

    if verbose > 2:
        if is_multimetric:
            for scorer_name, score in test_scores.items():
                msg += ", %s=%s" % (scorer_name, score)
        else:
            msg += ", score=%s" % test_scores
    if verbose > 1:
        total_time = score_time + fit_time
        end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    if return_estimator:
        ret.append(estimator)
    if return_idx:
        ret.extend([train, test])
    return ret