Python _partition_estimators示例，sklearn.ensemble._base._partition_estimators Python示例

示例#1

0

显示文件

文件： _ensemble.py 项目： whynjones/sktime

    def predict(self, X):
        """Predict regression target for X.
        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted values.
        """
        self.check_is_fitted()
        # Check data
        X = check_X(X, enforce_univariate=True)
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # Parallel loop
        y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(e.predict)(X, check_input=True) for e in self.estimators_)

        return np.sum(y_hat, axis=0) / len(self.estimators_)

示例#2

0

显示文件

文件： _ensemble.py 项目： wh28325/sktime

    def predict_proba(self, X):
        """Predict class probabilities for X.
        The predicted class probabilities of an input sample are computed as
        the mean predicted class probabilities of the trees in the forest. The
        class probability of a single tree is the fraction of samples of the
        same
        class in a leaf.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        # Check data
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True)

        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        all_proba = Parallel(n_jobs=n_jobs,
                             verbose=self.verbose)(delayed(e.predict_proba)(X)
                                                   for e in self.estimators_)

        return np.sum(all_proba, axis=0) / len(self.estimators_)

示例#3

0

显示文件

文件： RandomForest.py 项目： tungtokyo1108/Random_Forests_Forward_Variable_Selection_for_High-Dimensional_Data

    def predict_proba(self, X):

        check_is_fitted(self)

        X = self._validate_X_predict(X)

        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        all_proba = [
            np.zeros((X.shape[0], j), dtype=np.float64)
            for j in np.atleast_1d(self.n_classes_)
        ]

        lock = threading.Lock()
        Parallel(n_jobs=-1,
                 verbose=self.verbose,
                 **_joblib_parallel_args(require="sharedmem"))(
                     delayed(accumalate_prediction)(e.predict_proba, X,
                                                    all_proba, lock)
                     for e in self.estimators_)

        for proba in all_proba:
            proba /= len(self.estimators_)

        if len(all_proba) == 1:
            return all_proba[0]
        else:
            return all_proba

示例#4

0

显示文件

文件： forest.py 项目： mehrdad-shokri/scikit-survival

    def _predict(self, predict_fn, X):
        check_is_fitted(self, 'estimators_')
        # Check data
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # avoid storing the output of every estimator by summing them here
        if predict_fn == "predict":
            y_hat = np.zeros((X.shape[0]), dtype=np.float64)
        else:
            y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)

        def _get_fn(est, name):
            fn = getattr(est, name)
            if name in ("predict_cumulative_hazard_function",
                        "predict_survival_function"):
                fn = partial(fn, return_array=True)
            return fn

        # Parallel loop
        lock = threading.Lock()
        Parallel(n_jobs=n_jobs,
                 verbose=self.verbose,
                 **_joblib_parallel_args(require="sharedmem"))(
                     delayed(_accumulate_prediction)(_get_fn(e, predict_fn), X,
                                                     [y_hat], lock)
                     for e in self.estimators_)

        y_hat /= len(self.estimators_)

        return y_hat

示例#5

0

显示文件

文件： _bagging_new.py 项目： NeuroDataDesign/team-forbidden-forest

    def predict(self, X):
        """Predict regression target for X.
        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the estimators in the ensemble.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.
        Returns
        -------
        y : ndarray of shape (n_samples,)
            The predicted values.
        """
        check_is_fitted(self)
        # Check data
        X = check_array(X,
                        accept_sparse=['csr', 'csc'],
                        dtype=None,
                        force_all_finite=False)

        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(
            self.n_estimators, self.n_jobs)

        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(_parallel_predict_regression)(
                self.estimators_[starts[i]:starts[i + 1]],
                self.estimators_features_[starts[i]:starts[i + 1]], X)
            for i in range(n_jobs))

        # Reduce
        y_hat = sum(all_y_hat) / self.n_estimators

        return y_hat

示例#6

0

显示文件

文件： _serial_bagging.py 项目： mnarayan/AFQ-Insight

    def predict_log_proba(self, X):
        """Predict class log-probabilities for X.

        The predicted class log-probabilities of an input sample is computed as
        the log of the mean predicted class probabilities of the base
        estimators in the ensemble.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : ndarray of shape (n_samples, n_classes)
            The class log-probabilities of the input samples. The order of the
            classes corresponds to that in the attribute :term:`classes_`.
        """
        check_is_fitted(self)
        if hasattr(self.base_estimator_, "predict_log_proba"):
            # Check data
            X = check_array(X,
                            accept_sparse=["csr", "csc"],
                            dtype=None,
                            force_all_finite=False)

            if self.n_features_ != X.shape[1]:
                raise ValueError("Number of features of the model must "
                                 "match the input. Model n_features is {0} "
                                 "and input n_features is {1} "
                                 "".format(self.n_features_, X.shape[1]))

            # Partition the estimators
            n_jobs, n_estimators, starts = _partition_estimators(
                self.n_estimators, self.n_jobs)

            all_log_proba = [
                _parallel_predict_log_proba(
                    self.estimators_[starts[i]:starts[i + 1]],
                    self.estimators_features_[starts[i]:starts[i + 1]],
                    X,
                    self.n_classes_,
                ) for i in range(n_jobs)
            ]

            # Reduce
            log_proba = all_log_proba[0]

            for j in range(1, len(all_log_proba)):
                log_proba = np.logaddexp(log_proba, all_log_proba[j])

            log_proba -= np.log(self.n_estimators)

            return log_proba

        else:
            return np.log(self.predict_proba(X))

示例#7

0

显示文件

    def predict_proba(self, X):
        """Predict class probabilities for X.

        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the trees in the forest. The
        class probability of a single tree is the fraction of samples of the same
        class in a leaf.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        # Check data
        if self.scaling:
            X = self._scale(X)
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # Parallel loop
        all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,
                             backend="threading")(
            delayed(_parallel_helper)(e, 'predict_proba', X,
                                      check_input=False)
            for e in self.estimators_)

        # Reduce
        proba = all_proba[0]

        if self.n_outputs_ == 1:
            for j in range(1, len(all_proba)):
                proba += self.estimator_weights[j] * all_proba[j]

            # proba /= len(self.estimators_)
            proba /= np.sum(self.estimator_weights[j])

        else:
            for j in range(1, len(all_proba)):
                for k in range(self.n_outputs_):
                    proba[k] += self.estimator_weights[j] * all_proba[j][k]

            for k in range(self.n_outputs_):
                proba[k] /= np.sum(self.estimator_weights[j])

        return proba

示例#8

0

显示文件

文件： _serial_bagging.py 项目： mnarayan/AFQ-Insight

    def predict_proba(self, X):
        """Predict class probabilities for X.

        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the base estimators in the
        ensemble. If base estimators do not implement a ``predict_proba``
        method, then it resorts to voting and the predicted class probabilities
        of an input sample represents the proportion of estimators predicting
        each class.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : ndarray of shape (n_samples, n_classes)
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute :term:`classes_`.
        """
        check_is_fitted(self)
        # Check data
        X = check_array(X,
                        accept_sparse=["csr", "csc"],
                        dtype=None,
                        force_all_finite=False)

        if self.n_features_ != X.shape[1]:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is {0} and "
                             "input n_features is {1}."
                             "".format(self.n_features_, X.shape[1]))

        # Partition the estimators
        n_jobs, n_estimators, starts = _partition_estimators(
            self.n_estimators, self.n_jobs)

        all_proba = [
            _parallel_predict_proba(
                self.estimators_[starts[i]:starts[i + 1]],
                self.estimators_features_[starts[i]:starts[i + 1]],
                X,
                self.n_classes_,
            ) for i in range(n_jobs)
        ]

        # Reduce
        proba = sum(all_proba) / self.n_estimators

        return proba

示例#9

0

显示文件

    def predict_log_proba(self, X):
        """Predict class log-probabilities for X.

        The predicted class log-probabilities of an input sample is computed as
        the log of the mean predicted class probabilities of the base
        estimators in the ensemble.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape = [n_samples, n_features]
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : array of shape = [n_samples, n_classes]
            The class log-probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        check_is_fitted(self, "classes_")
        if hasattr(self.base_estimator_, "predict_log_proba"):
            # Check data
            X = check_array(X, accept_sparse=['csr', 'csc'])

            if self.n_features_ != X.shape[1]:
                raise ValueError("Number of features of the model must "
                                 "match the input. Model n_features is {0} "
                                 "and input n_features is {1} "
                                 "".format(self.n_features_, X.shape[1]))

            # Parallel loop
            n_jobs, n_estimators, starts = _partition_estimators(
                self.n_estimators, self.n_jobs)

            all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
                delayed(_parallel_predict_log_proba)
                (self.estimators_[starts[i]:starts[i + 1]], self.
                 estimators_features_[starts[i]:starts[i +
                                                       1]], X, self.n_classes_)
                for i in range(n_jobs))

            # Reduce
            log_proba = all_log_proba[0]

            for j in range(1, len(all_log_proba)):  # pragma: no cover
                log_proba = np.logaddexp(log_proba, all_log_proba[j])

            log_proba -= np.log(self.n_estimators)

            return log_proba
        # else, the base estimator has no predict_log_proba, so...
        return np.log(self.predict_proba(X))

示例#10

0

显示文件

    def predict_proba(self, X):
        """
        Find probability estimates for each class for all cases in X.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The input samples. If a Pandas data frame is passed it must have a
            single column (i.e., univariate classification). RISE has no
            bespoke method for multivariate classification as yet.

        Local variables
        ---------------
        n_instances : int
            Number of cases to classify.
        n_columns : int
            Number of attributes in X, must match `series_length` determined
            in `fit`.

        Returns
        -------
        output : array of shape = [n_instances, n_classes]
            The class probabilities of all cases.
        """
        # Check data
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        X = X.squeeze(1)

        n_instances, n_columns = X.shape
        if n_columns != self.series_length:
            raise TypeError(
                "ERROR number of attributes in the train does not match "
                "that in the test data."
            )

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # Parallel loop
        all_proba = Parallel(n_jobs=n_jobs)(
            delayed(_predict_proba_for_estimator)(
                X,
                self.estimators_[i],
                self.intervals[i],
                self.lags[i],
            )
            for i in range(self.n_estimators)
        )

        return np.sum(all_proba, axis=0) / self.n_estimators

示例#11

0

显示文件

    def predict_proba(self, X):
        """Predict class probabilities for X.

        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the base estimators in the
        ensemble. If base estimators do not implement a ``predict_proba``
        method, then it resorts to voting and the predicted class probabilities
        of an input sample represents the proportion of estimators predicting
        each class.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape = [n_samples, n_features]
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. 
        """

        check_is_fitted(self)
        # Check data
        X = check_array(
            X, accept_sparse=['csr', 'csc'], dtype=None,
            force_all_finite=False
        )
        if self.n_features_ != X.shape[1]:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is {0} and "
                             "input n_features is {1}."
                             "".format(self.n_features_, X.shape[1]))
        
        # Parallel loop
        n_jobs, _, starts = _partition_estimators(self.n_estimators,
                                                             self.n_jobs)

        all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,
                             **self._parallel_args())(
            delayed(_parallel_predict_proba)(
                self.estimators_[starts[i]:starts[i + 1]],
                self.estimators_features_[starts[i]:starts[i + 1]],
                X,
                self.n_classes_)
            for i in range(n_jobs))

        # Reduce
        proba = sum(all_proba) / len(self.estimators_)

        return proba

示例#12

0

显示文件

文件： RandomForest.py 项目： FurongYe/Bayesian-Optimization

    def predict(self, X, eval_MSE=False):
        """Predict regression target for `X`.
        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
            The predicted values.
        """
        check_is_fitted(self)
        # Check data
        X = self._check_X(X)
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # storing the output of every estimator since those are required to estimate the MSE
        if self.n_outputs_ > 1:
            y_hat_all = np.zeros(
                (X.shape[0], self.n_outputs_, self.n_estimators),
                dtype=np.float64)
        else:
            y_hat_all = np.zeros((X.shape[0], self.n_estimators),
                                 dtype=np.float64)

        for i, e in enumerate(self.estimators_):
            y_hat_all[..., i] = e.predict(X, check_input=False)

        # TODO: this actually takes much longer than the sequential execution
        # which might be caused by the overheads in spawning the threads.
        # Parallel loop
        # Parallel(n_jobs=n_jobs, verbose=self.verbose, backend="threading")(
        #     delayed(_save_prediction)(e.predict, X, i, y_hat_all) \
        #         for i, e in enumerate(self.estimators_)
        # )

        y_hat = np.mean(y_hat_all, axis=1).flatten()
        if eval_MSE:
            # TODO: implement the jackknife estimate of variance
            _MSE_hat = np.std(y_hat_all, axis=1, ddof=1)**2.
            _MSE_hat = _MSE_hat.flatten()

        return (y_hat, _MSE_hat) if eval_MSE else y_hat

示例#13

0

显示文件

文件： _serial_bagging.py 项目： mnarayan/AFQ-Insight

    def decision_function(self, X):
        """Average of the decision functions of the base classifiers.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        score : ndarray of shape (n_samples, k)
            The decision function of the input samples. The columns correspond
            to the classes in sorted order, as they appear in the attribute
            ``classes_``. Regression and binary classification are special
            cases with ``k == 1``, otherwise ``k==n_classes``.

        """
        check_is_fitted(self)

        # Check data
        X = check_array(X,
                        accept_sparse=["csr", "csc"],
                        dtype=None,
                        force_all_finite=False)

        if self.n_features_ != X.shape[1]:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is {0} and "
                             "input n_features is {1} "
                             "".format(self.n_features_, X.shape[1]))

        # Partition the estimators
        n_jobs, n_estimators, starts = _partition_estimators(
            self.n_estimators, self.n_jobs)

        all_decisions = [
            _parallel_decision_function(
                self.estimators_[starts[i]:starts[i + 1]],
                self.estimators_features_[starts[i]:starts[i + 1]],
                X,
            ) for i in range(n_jobs)
        ]

        # Reduce
        decisions = sum(all_decisions) / self.n_estimators

        return decisions

示例#14

0

显示文件

文件： surrogates.py 项目： PGijsbers/symbolicdefaults

def predict(self, X):
    """
    Predict regression target for X.

    The predicted regression target of an input sample is computed as the
    mean predicted regression targets of the trees in the forest.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The input samples. Internally, its dtype will be converted to
        ``dtype=np.float32``. If a sparse matrix is provided, it will be
        converted into a sparse ``csr_matrix``.

    Returns
    -------
    y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
        The predicted values.
    """
    check_is_fitted(self)
    # Check data
    X = self._validate_X_predict(X)

    # Assign chunk of trees to jobs
    n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

    # avoid storing the output of every estimator by summing them here
    if self.n_outputs_ > 1:
        y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)
    else:
        y_hat = np.zeros((X.shape[0]), dtype=np.float64)

    # Parallel loop
    lock = threading.Lock()
    # <<< sklearn
    # Parallel(n_jobs=n_jobs, verbose=self.verbose,
    #          **_joblib_parallel_args(require="sharedmem"))(
    #     delayed(_accumulate_prediction)(e.predict, X, [y_hat], lock)
    #     for e in self.estimators_)
    # >>> monkey patch
    for e in self.estimators_:
        _accumulate_prediction(e.predict, X, [y_hat], lock)
    # ---------------
    y_hat /= len(self.estimators_)

    return y_hat

示例#15

0

显示文件

    def predict_proba_trees(self, X):
        check_is_fitted(self)
        # Check data
        X = self._validate_X_predict(X)
        # TODO: we can also avoid data binning for predictions...
        X_binned = self._bin_data(X, is_training_data=False)
        n_samples, n_features = X.shape
        n_estimators = len(self.trees)
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)
        probas = np.empty((n_estimators, n_samples, n_features))

        lock = threading.Lock()
        Parallel(
            n_jobs=n_jobs,
            verbose=self.verbose,
            **_joblib_parallel_args(require="sharedmem"),
        )(delayed(_get_tree_prediction)(e.predict_proba, X_binned, probas,
                                        lock, tree_idx)
          for tree_idx, e in enumerate(self.trees))
        return probas

示例#16

0

显示文件

    def predict(self, X):
        """
        Predict regression target for X.
        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.
        Parameters
        ----------
        X : array-like or sparse matrix of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            The predicted values.
        """
        check_is_fitted(self)
        # Check data
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # Parallel loop
        # Store the output of every estimator in order to compute confidence intervals
        y_hat = Parallel(n_jobs=self.n_jobs,
                         verbose=self.verbose,
                         **_joblib_parallel_args(require="sharedmem"))(
                             delayed(_accumulate_prediction)(
                                 e.predict, X, self.minimum_value)
                             for e in self.forest.estimators_)

        y_hat_below = np.percentile(y_hat,
                                    self.confidence_interval_lower,
                                    axis=0)
        y_hat_above = np.percentile(y_hat,
                                    self.confidence_interval_upper,
                                    axis=0)

        return np.dstack((y_hat_below, y_hat_above))

示例#17

0

显示文件

    def predict(self,
                X: Union[Solution, List, np.ndarray],
                eval_MSE=False) -> np.ndarray:
        """Predict regression target for `X`.
        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
            The predicted values.
        """
        check_is_fitted(self)
        # check data
        X = self._check_X(X)
        X = self._validate_X_predict(X)
        # assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)
        # storing the output of every estimator since those are required to estimate the MSE
        y_hat_all = (np.zeros(
            (X.shape[0], self.n_outputs_, self.n_estimators), dtype=np.float64)
                     if self.n_outputs_ > 1 else np.zeros(
                         (X.shape[0], self.n_estimators), dtype=np.float64))
        # parallel loop
        Parallel(n_jobs=n_jobs, verbose=self.verbose, backend="threading")(
            delayed(_save_prediction)(e.predict, X, i, y_hat_all)
            for i, e in enumerate(self.estimators_))
        y_hat = np.mean(y_hat_all, axis=-1)

        if eval_MSE:
            # TODO: implement the jackknife estimate of variance
            MSE_hat = np.std(y_hat_all, axis=-1, ddof=1)**2.0
        return (y_hat, MSE_hat) if eval_MSE else y_hat

示例#18

0

显示文件

def _forest_predict_var(forest, X_test, n_jobs):
    """Helper function to accumulate predictions and their variances.

    Parameters
    ----------
    forest : RandomForestRegressor
        Regressor object.

    X_test : ndarray, shape (n_test_samples,)
        The design matrix for testing data.

    n_jobs : int or None, optional (default=None)
        The number of jobs to run in parallel. ``None`` means 1. ``-1`` means
        use all processors.
    """
    check_is_fitted(forest)
    X_test = forest._validate_X_predict(X_test)

    n_jobs, _, _ = _partition_estimators(forest.n_estimators, n_jobs)

    y_hat = np.zeros((X_test.shape[0]), dtype=np.float64)
    y_var = np.zeros((X_test.shape[0]), dtype=np.float64)

    # Parallel loop
    lock = threading.Lock()
    Parallel(n_jobs=n_jobs,
             verbose=forest.verbose,
             **_joblib_parallel_args(require='sharedmem'))(
                 delayed(_accumulate_predictions_and_var)(e.predict, X_test,
                                                          [y_hat, y_var], lock)
                 for e in forest.estimators_)

    y_hat /= len(forest.estimators_)
    y_var /= len(forest.estimators_)
    y_var -= y_hat**2

    return [y_hat, y_var]

示例#19

0

显示文件

文件： _risf.py 项目： satya-pattnaik/sktime

    def _predict_proba(self, X):
        """Find probability estimates for each class for all cases in X.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The input samples. If a Pandas data frame is passed it must have a
            single column (i.e., univariate classification). RISE has no
            bespoke method for multivariate classification as yet.

        Attributes
        ----------
        n_instances : int
            Number of cases to classify.
        n_columns : int
            Number of attributes in X, must match `series_length` determined
            in `fit`.

        Returns
        -------
        output : array of shape = [n_instances, n_classes]
            The class probabilities of all cases.
        """
        X = X.squeeze(1)
        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # Parallel loop
        all_proba = Parallel(n_jobs=n_jobs)(
            delayed(_predict_proba_for_estimator)(
                X,
                self.estimators_[i],
                self.intervals[i],
                self.lags[i],
            ) for i in range(self.n_estimators))

        return np.sum(all_proba, axis=0) / self.n_estimators

示例#20

0

显示文件

    def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
        """Build a Bagging ensemble of estimators from the training
           set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape = [n_samples, n_features]
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        y : array-like, shape = [n_samples]
            The target values (1 for positive, 0 for unlabeled).

        max_samples : int or float, optional (default=None)
            Argument to use instead of self.max_samples.

        max_depth : int, optional (default=None)
            Override value used when constructing base estimator. Only
            supported if the base estimator has a max_depth parameter.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if the base estimator supports
            sample weighting.

        Returns
        -------
        self : object
            Returns self.
        """
        random_state = check_random_state(self.random_state)

        self.y = y
        
        # Convert data
        X, y = check_X_y(X, y, ['csr', 'csc'])
        if sample_weight is not None:
            sample_weight = check_array(sample_weight, ensure_2d=False)
            check_consistent_length(y, sample_weight)

        # Remap output
        n_samples, self.n_features_ = X.shape
        self._n_samples = n_samples
        y = self._validate_y(y)

        # Check parameters
        self._validate_estimator()

        if max_depth is not None:
            self.base_estimator_.max_depth = max_depth

        # Validate max_samples
        if max_samples is None:
            max_samples = self.max_samples
        elif not isinstance(max_samples, (numbers.Integral, np.integer)):
            max_samples = int(max_samples * sum(y < 1))

        if not (0 < max_samples <= sum(y < 1)):
            raise ValueError("max_samples must be positive"
                             " and no larger than the number of unlabeled points")

        # Store validated integer row sampling value
        self._max_samples = max_samples

        # Validate max_features
        if isinstance(self.max_features, (numbers.Integral, np.integer)):
            max_features = self.max_features
        else:  # float
            max_features = int(self.max_features * self.n_features_)

        if not (0 < max_features <= self.n_features_):
            raise ValueError("max_features must be in (0, n_features]")

        # Store validated integer feature sampling value
        self._max_features = max_features

        # Other checks
        if not self.bootstrap and self.oob_score:
            raise ValueError("Out of bag estimation only available"
                             " if bootstrap=True")

        if self.warm_start and self.oob_score:
            raise ValueError("Out of bag estimate only available"
                             " if warm_start=False")

        if hasattr(self, "oob_score_") and self.warm_start:
            del self.oob_score_

        if not self.warm_start or not hasattr(self, 'estimators_'):
            # Free allocated memory, if any
            self.estimators_ = []
            self.estimators_features_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:
            raise ValueError('n_estimators=%d must be larger or equal to '
                             'len(estimators_)=%d when warm_start==True'
                             % (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:
            warn("Warm-start fitting without increasing n_estimators does not "
                 "fit new trees.")
            return self

        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(n_more_estimators,
                                                             self.n_jobs)
        total_n_estimators = sum(n_estimators)

        # Advance random state to state after training
        # the first n_estimators
        if self.warm_start and len(self.estimators_) > 0:
            random_state.randint(MAX_INT, size=len(self.estimators_))

        seeds = random_state.randint(MAX_INT, size=n_more_estimators)
        self._seeds = seeds

        all_results = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(_parallel_build_estimators)(
                n_estimators[i],
                self,
                X,
                y,
                sample_weight,
                seeds[starts[i]:starts[i + 1]],
                total_n_estimators,
                verbose=self.verbose)
            for i in range(n_jobs))

        # Reduce
        self.estimators_ += list(itertools.chain.from_iterable(
            t[0] for t in all_results))
        self.estimators_features_ += list(itertools.chain.from_iterable(
            t[1] for t in all_results))

        if self.oob_score:
            self._set_oob_score(X, y)

        return self

示例#21

0

显示文件

文件： _serial_bagging.py 项目： mnarayan/AFQ-Insight

    def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
        """Build a Bagging ensemble of estimators from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        y : array-like of shape (n_samples,)
            The target values (class labels in classification, real numbers in
            regression).

        max_samples : int or float, default=None
            Argument to use instead of self.max_samples.

        max_depth : int, default=None
            Override value used when constructing base estimator. Only
            supported if the base estimator has a max_depth parameter.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if the base estimator supports
            sample weighting.

        Returns
        -------
        self : object
        """
        random_state = check_random_state(self.random_state)

        # Convert data (X is required to be 2d and indexable)
        X, y = self._validate_data(
            X,
            y,
            accept_sparse=["csr", "csc"],
            dtype=None,
            force_all_finite=False,
            multi_output=True,
        )
        if sample_weight is not None:  # pragma: no cover
            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)

        # Remap output
        n_samples, self.n_features_ = X.shape
        self._n_samples = n_samples
        y = self._validate_y(y)

        # Check parameters
        self._validate_estimator()

        if max_depth is not None:  # pragma: no cover
            self.base_estimator_.max_depth = max_depth

        # Validate max_samples
        if max_samples is None:  # pragma: no cover
            max_samples = self.max_samples
        elif not isinstance(max_samples, numbers.Integral):  # pragma: no cover
            max_samples = int(max_samples * X.shape[0])

        if not (0 < max_samples <= X.shape[0]):  # pragma: no cover
            raise ValueError("max_samples must be in (0, n_samples]")

        # Store validated integer row sampling value
        self._max_samples = max_samples

        # Validate max_features
        if isinstance(self.max_features, numbers.Integral):
            max_features = self.max_features
        elif isinstance(self.max_features, np.float):  # pragma: no cover
            max_features = self.max_features * self.n_features_
        else:  # pragma: no cover
            raise ValueError("max_features must be int or float")

        if not (0 < max_features <= self.n_features_):  # pragma: no cover
            raise ValueError("max_features must be in (0, n_features]")

        max_features = max(1, int(max_features))

        # Store validated integer feature sampling value
        self._max_features = max_features

        # Other checks
        if not self.bootstrap and self.oob_score:  # pragma: no cover
            raise ValueError("Out of bag estimation only available"
                             " if bootstrap=True")

        if self.warm_start and self.oob_score:  # pragma: no cover
            raise ValueError("Out of bag estimate only available"
                             " if warm_start=False")

        if hasattr(self, "oob_score_") and self.warm_start:  # pragma: no cover
            del self.oob_score_

        if not self.warm_start or not hasattr(
                self, "estimators_"):  # pragma: no cover
            # Free allocated memory, if any
            self.estimators_ = []
            self.estimators_features_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:  # pragma: no cover
            raise ValueError("n_estimators=%d must be larger or equal to "
                             "len(estimators_)=%d when warm_start==True" %
                             (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:  # pragma: no cover
            warn("Warm-start fitting without increasing n_estimators does not "
                 "fit new trees.")
            return self

        # Partition the estimators
        n_jobs, n_estimators, starts = _partition_estimators(
            n_more_estimators, self.n_jobs)
        total_n_estimators = sum(n_estimators)

        # Advance random state to state after training
        # the first n_estimators
        if self.warm_start and len(self.estimators_) > 0:  # pragma: no cover
            random_state.randint(MAX_INT, size=len(self.estimators_))

        seeds = random_state.randint(MAX_INT, size=n_more_estimators)
        self._seeds = seeds

        all_results = [
            _parallel_build_estimators(
                n_estimators[i],
                self,
                X,
                y,
                sample_weight,
                seeds[starts[i]:starts[i + 1]],
                total_n_estimators,
                verbose=self.verbose,
            ) for i in range(n_jobs)
        ]

        # Reduce
        self.estimators_ += list(
            itertools.chain.from_iterable(t[0] for t in all_results))
        self.estimators_features_ += list(
            itertools.chain.from_iterable(t[1] for t in all_results))

        if self.oob_score:
            self._set_oob_score(X, y)

        return self

示例#22

0

显示文件

文件： sb_bagging.py 项目： bobcolner/pandas-polygon

    def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
        """Build a Sequentially Bootstrapped Bagging ensemble of estimators from the training
           set (X, y).
        Parameters
        ----------
        X : (array-like, sparse matrix) of shape = [n_samples, n_features]
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.
        y : (array-like), shape = [n_samples]
            The target values (class labels in classification, real numbers in
            regression).
        max_samples : (int or float), optional (default=None)
            Argument to use instead of self.max_samples.
        max_depth : (int), optional (default=None)
            Override value used when constructing base estimator. Only
            supported if the base estimator has a max_depth parameter.
        sample_weight : (array-like), shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if the base estimator supports
            sample weighting.
        Returns
        -------
        self : (object)
        """
        random_state = check_random_state(self.random_state)
        self.X_time_index = X.index  # Remember X index for future sampling

        # Generate subsample ind_matrix (we need this during subsampling cross_validation)
        subsampled_ind_mat = self.ind_mat[:, self.timestamp_int_index_mapping.
                                          loc[self.X_time_index]]

        # Convert data (X is required to be 2d and indexable)
        X, y = check_X_y(X,
                         y, ['csr', 'csc'],
                         dtype=None,
                         force_all_finite=False,
                         multi_output=True)
        if sample_weight is not None:
            sample_weight = check_array(sample_weight, ensure_2d=False)
            check_consistent_length(y, sample_weight)

        # Remap output
        n_samples, self.n_features_ = X.shape
        self._n_samples = n_samples
        y = self._validate_y(y)

        # Check parameters
        self._validate_estimator()

        # Validate max_samples
        if not isinstance(max_samples, (numbers.Integral, np.integer)):
            max_samples = int(max_samples * X.shape[0])

        if not (0 < max_samples <= X.shape[0]):
            raise ValueError("max_samples must be in (0, n_samples]")

        # Store validated integer row sampling value
        self._max_samples = max_samples

        # Validate max_features
        if isinstance(self.max_features, (numbers.Integral, np.integer)):
            max_features = self.max_features
        elif isinstance(self.max_features, np.float):
            max_features = self.max_features * self.n_features_
        else:
            raise ValueError("max_features must be int or float")

        if not (0 < max_features <= self.n_features_):
            raise ValueError("max_features must be in (0, n_features]")

        max_features = max(1, int(max_features))

        # Store validated integer feature sampling value
        self._max_features = max_features

        if self.warm_start and self.oob_score:
            raise ValueError("Out of bag estimate only available"
                             " if warm_start=False")

        if not self.warm_start or not hasattr(self, 'estimators_'):
            # Free allocated memory, if any
            self.estimators_ = []
            self.estimators_features_ = []
            self.sequentially_bootstrapped_samples_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:
            raise ValueError('n_estimators=%d must be larger or equal to '
                             'len(estimators_)=%d when warm_start==True' %
                             (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:
            warn("Warm-start fitting without increasing n_estimators does not "
                 "fit new trees.")
            return self

        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(
            n_more_estimators, self.n_jobs)
        total_n_estimators = sum(n_estimators)

        # Advance random state to state after training
        # the first n_estimators
        if self.warm_start and len(self.estimators_) > 0:
            random_state.randint(MAX_INT, size=len(self.estimators_))

        seeds = random_state.randint(MAX_INT, size=n_more_estimators)
        self._seeds = seeds

        # pylint: disable=C0330
        all_results = Parallel(
            n_jobs=n_jobs,
            verbose=self.verbose,
        )(delayed(_parallel_build_estimators)(n_estimators[i],
                                              self,
                                              X,
                                              y,
                                              subsampled_ind_mat,
                                              sample_weight,
                                              seeds[starts[i]:starts[i + 1]],
                                              total_n_estimators,
                                              verbose=self.verbose)
          for i in range(n_jobs))

        # Reduce
        self.estimators_ += list(
            itertools.chain.from_iterable(t[0] for t in all_results))
        self.estimators_features_ += list(
            itertools.chain.from_iterable(t[1] for t in all_results))
        self.sequentially_bootstrapped_samples_ += list(
            itertools.chain.from_iterable(t[2] for t in all_results))

        if self.oob_score:
            self._set_oob_score(X, y)

        return self

示例#23

0

显示文件

文件： bagging.py 项目： auto-flow/tmall-repeat

    def fit(self, X, y, sample_weight=None):
        random_state = check_random_state(self.random_state)
        self._max_samples = int(self.max_samples * X.shape[0])
        # Convert data (X is required to be 2d and indexable)
        X, y = check_X_y(X,
                         y, ['csr', 'csc'],
                         dtype=None,
                         force_all_finite=False,
                         multi_output=True)
        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)

        # Remap output
        n_samples, self.n_features_ = X.shape
        self._n_samples = n_samples
        y = self._validate_y(y)

        # Check parameters
        self._validate_estimator()

        # Validate max_features
        if isinstance(self.max_features, numbers.Integral):
            max_features = self.max_features
        elif isinstance(self.max_features, np.float):
            max_features = self.max_features * self.n_features_
        else:
            raise ValueError("max_features must be int or float")

        if not (0 < max_features <= self.n_features_):
            raise ValueError("max_features must be in (0, n_features]")

        max_features = max(1, int(max_features))

        # Store validated integer feature sampling value
        self._max_features = max_features

        # Other checks
        if not self.bootstrap and self.oob_score:
            raise ValueError("Out of bag estimation only available"
                             " if bootstrap=True")

        if self.warm_start and self.oob_score:
            raise ValueError("Out of bag estimate only available"
                             " if warm_start=False")

        if hasattr(self, "oob_score_") and self.warm_start:
            del self.oob_score_

        if not self.warm_start or not hasattr(self, 'estimators_'):
            # Free allocated memory, if any
            self.estimators_ = []
            self.estimators_features_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:
            raise ValueError('n_estimators=%d must be larger or equal to '
                             'len(estimators_)=%d when warm_start==True' %
                             (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:
            warn("Warm-start fitting without increasing n_estimators does not "
                 "fit new trees.")
            return self

        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(
            n_more_estimators, self.n_jobs)
        total_n_estimators = sum(n_estimators)

        # Advance random state to state after training
        # the first n_estimators
        if self.warm_start and len(self.estimators_) > 0:
            random_state.randint(MAX_INT, size=len(self.estimators_))

        seeds = random_state.randint(MAX_INT, size=n_more_estimators)
        self._seeds = seeds

        all_results = Parallel(
            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args())(
                delayed(_parallel_build_estimators)(n_estimators[i],
                                                    self,
                                                    X,
                                                    y,
                                                    sample_weight,
                                                    seeds[starts[i]:starts[i +
                                                                           1]],
                                                    total_n_estimators,
                                                    verbose=self.verbose)
                for i in range(n_jobs))

        # Reduce
        self.estimators_ += list(
            itertools.chain.from_iterable(t[0] for t in all_results))
        self.estimators_features_ += list(
            itertools.chain.from_iterable(t[1] for t in all_results))

        if self.oob_score:
            self._set_oob_score(X, y)

        return self

示例#24

0

显示文件

    def predict_proba(self, X):
        """
        Predict class probabilities for X.

        The predicted class probabilities of an input sample are computed as
        the mean predicted class probabilities of the trees in the forest.
        The class probability of a single tree is the fraction of samples of
        the same class in a leaf.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.

        Returns
        -------
        p : ndarray of shape (n_samples, n_classes), or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute :term:`classes_`.
        """
        # TODO: c'est un copier / coller de scikit-learn. Simplifier au cas
        #  classification binaire. Et il faut binner les features avant de predire

        check_is_fitted(self)
        # Check data
        X = self._validate_X_predict(X, check_input=True)

        # TODO: we can also avoid data binning for predictions...
        # Bin the data
        X_binned = self._bin_data(X, is_training_data=False)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # TODO: on ne gere pas encore le cas multi-output mais juste un label binaire
        # avoid storing the output of every estimator by summing them here
        # all_proba = [
        #     np.zeros((X.shape[0], j), dtype=np.float64)
        #     for j in np.atleast_1d(self.n_classes_)
        # ]

        all_proba = np.zeros((X_binned.shape[0], self.n_classes_))

        lock = threading.Lock()
        Parallel(
            n_jobs=n_jobs,
            verbose=self.verbose,
            **_joblib_parallel_args(require="sharedmem"),
        )(delayed(_accumulate_prediction)(e.predict_proba, X_binned, all_proba,
                                          lock) for e in self.trees)

        # for proba in all_proba:
        #     proba /= len(self.trees)
        all_proba /= len(self.trees)

        # if len(all_proba) == 1:
        #     return all_proba[0]
        # else:
        #     return all_proba
        return all_proba

示例#25

0

显示文件

    def _fit(
        self,
        X,
        y,
        *,
        sample_weight=None,
        sampler_kwargs: dict = {},
        max_samples=None,
        eval_datasets: dict = None,
        eval_metrics: dict = None,
        train_verbose: bool or int or dict,
    ):
        """Build a Bagging ensemble of estimators from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        y : array-like of shape (n_samples,)
            The target values (class labels in classification, real numbers in
            regression).

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if the base estimator supports
            sample weighting.
        
        sampler_kwargs : dict, default={}
            The kwargs to use as additional parameters when instantiating a
            new sampler. If none are given, default parameters are used.
        
        max_samples : int or float, default=None
            Argument to use instead of self.max_samples.
        
        %(eval_datasets)s
        
        %(eval_metrics)s
        
        %(train_verbose)s

        Returns
        -------
        self : object
        """

        # Check data, sampler_kwargs and random_state
        check_target_type(y)

        self.sampler_kwargs_ = check_type(sampler_kwargs, 'sampler_kwargs',
                                          dict)

        random_state = check_random_state(self.random_state)

        # Convert data (X is required to be 2d and indexable)
        check_x_y_args = {
            'accept_sparse': ['csr', 'csc'],
            'dtype': None,
            'force_all_finite': False,
            'multi_output': True,
        }
        X, y = self._validate_data(X, y, **check_x_y_args)

        # Check evaluation data
        self.eval_datasets_ = check_eval_datasets(eval_datasets, X, y,
                                                  **check_x_y_args)

        # Check evaluation metrics
        self.eval_metrics_ = check_eval_metrics(eval_metrics)

        # Check verbose
        self.train_verbose_ = check_train_verbose(train_verbose,
                                                  self.n_estimators,
                                                  **self._properties)
        self._init_training_log_format()

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)

        # Remap output
        n_samples, self.n_features_in_ = X.shape
        self._n_samples = n_samples
        y = self._validate_y(y)

        # Check parameters
        self._validate_estimator()

        # Validate max_samples
        if max_samples is None:
            max_samples = self.max_samples
        if not isinstance(max_samples, numbers.Integral):
            max_samples = int(max_samples * X.shape[0])

        if not (0 < max_samples <= X.shape[0]):
            raise ValueError("max_samples must be in (0, n_samples]")

        # Store validated integer row sampling value
        self._max_samples = max_samples

        # Validate max_features
        if isinstance(self.max_features, numbers.Integral):
            max_features = self.max_features
        elif isinstance(self.max_features, float):
            max_features = self.max_features * self.n_features_in_
        else:
            raise ValueError("max_features must be int or float")

        if not (0 < max_features <= self.n_features_in_):
            raise ValueError("max_features must be in (0, n_features]")

        max_features = max(1, int(max_features))

        # Store validated integer feature sampling value
        self._max_features = max_features

        # Other checks
        if not self.bootstrap and self.oob_score:
            raise ValueError("Out of bag estimation only available"
                             " if bootstrap=True")

        if self.warm_start and self.oob_score:
            raise ValueError("Out of bag estimate only available"
                             " if warm_start=False")

        if hasattr(self, "oob_score_") and self.warm_start:
            del self.oob_score_

        if not self.warm_start or not hasattr(self, 'estimators_'):
            # Free allocated memory, if any
            self.estimators_ = []
            self.estimators_features_ = []
            self.estimators_n_training_samples_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:
            raise ValueError('n_estimators=%d must be larger or equal to '
                             'len(estimators_)=%d when warm_start==True' %
                             (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:
            warn("Warm-start fitting without increasing n_estimators does not "
                 "fit new trees.")
            return self

        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(
            n_more_estimators, self.n_jobs)
        total_n_estimators = sum(n_estimators)

        # Advance random state to state after training
        # the first n_estimators
        if self.warm_start and len(self.estimators_) > 0:
            random_state.randint(MAX_INT, size=len(self.estimators_))

        seeds = random_state.randint(MAX_INT, size=n_more_estimators)
        self._seeds = seeds

        all_results = Parallel(
            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args())(
                delayed(_parallel_build_estimators)(n_estimators[i],
                                                    self,
                                                    X,
                                                    y,
                                                    sample_weight,
                                                    seeds[starts[i]:starts[i +
                                                                           1]],
                                                    total_n_estimators,
                                                    verbose=self.verbose)
                for i in range(n_jobs))

        # Reduce
        self.estimators_ += list(
            itertools.chain.from_iterable(t[0] for t in all_results))
        self.estimators_features_ += list(
            itertools.chain.from_iterable(t[1] for t in all_results))
        self.estimators_n_training_samples_ += list(
            itertools.chain.from_iterable(t[2] for t in all_results))

        if self.oob_score:
            self._set_oob_score(X, y)

        # Print training infomation to console.
        self._training_log_to_console()

        return self