def _fit_stages(self, X, y, raw_predictions, sample_weight, random_state, begin_at_stage=0, monitor=None, X_idx_sorted=None): """Iteratively fits the stages. For each stage it computes the progress (OOB, train score) and delegates to ``_fit_stage``. Returns the number of stages fit; might differ from ``n_estimators`` due to early stopping. """ n_samples = X.shape[0] do_oob = self.subsample < 1.0 sample_mask = numpy.ones((n_samples, ), dtype=numpy.bool) n_inbag = max(1, int(self.subsample * n_samples)) loss_ = self.loss_ if self.verbose: verbose_reporter = VerboseReporter(verbose=self.verbose) verbose_reporter.init(self, begin_at_stage) X_csc = csc_matrix(X) if issparse(X) else None X_csr = csr_matrix(X) if issparse(X) else None if self.dropout_rate > 0.: scale = numpy.ones(self.n_estimators, dtype=float) else: scale = None # perform boosting iterations i = begin_at_stage for i in range(begin_at_stage, self.n_estimators): # subsampling if do_oob: sample_mask = _random_sample_mask(n_samples, n_inbag, random_state) # OOB score before adding this stage y_oob_sample = y[~sample_mask] old_oob_score = loss_(y_oob_sample, raw_predictions[~sample_mask], sample_weight[~sample_mask]) # fit next stage of trees raw_predictions = self._fit_stage(i, X, y, raw_predictions, sample_weight, sample_mask, random_state, scale, X_idx_sorted, X_csc, X_csr) # track deviance (= loss) if do_oob: self.train_score_[i] = loss_(y[sample_mask], raw_predictions[sample_mask], sample_weight[sample_mask]) self.oob_improvement_[i] = ( old_oob_score - loss_(y_oob_sample, raw_predictions[~sample_mask], sample_weight[~sample_mask])) else: # no need to fancy index w/ no subsampling self.train_score_[i] = loss_(y, raw_predictions, sample_weight) if self.verbose > 0: verbose_reporter.update(i, self) if monitor is not None: early_stopping = monitor(i, self, locals()) if early_stopping: break if self.dropout_rate > 0.: self.scale_ = scale return i + 1
def _fit(self, X, event, time, sample_weight, random_state): # noqa: C901 n_samples = X.shape[0] # account for intercept Xi = numpy.column_stack((numpy.ones(n_samples), X)) y = numpy.fromiter(zip(event, time), dtype=[('event', numpy.bool), ('time', numpy.float64)]) y_pred = numpy.zeros(n_samples) do_oob = self.subsample < 1.0 if do_oob: n_inbag = max(1, int(self.subsample * n_samples)) do_dropout = self.dropout_rate > 0 if do_dropout: scale = numpy.ones(int(self.n_estimators), dtype=float) if self.verbose: verbose_reporter = VerboseReporter(verbose=self.verbose) verbose_reporter.init(self, 0) for num_iter in range(int(self.n_estimators)): if do_oob: sample_mask = _random_sample_mask(n_samples, n_inbag, random_state) subsample_weight = sample_weight * sample_mask.astype( numpy.float64) # OOB score before adding this stage old_oob_score = self.loss_(y[~sample_mask], y_pred[~sample_mask], sample_weight[~sample_mask]) else: subsample_weight = sample_weight residuals = self.loss_.negative_gradient( y, y_pred, sample_weight=sample_weight) best_learner = _fit_stage_componentwise(Xi, residuals, subsample_weight) self.estimators_.append(best_learner) if do_dropout: drop_model, n_dropped = _sample_binomial_plus_one( self.dropout_rate, num_iter + 1, random_state) scale[num_iter] = 1. / (n_dropped + 1.) y_pred[:] = 0 for m in range(num_iter + 1): if drop_model[m] == 1: scale[m] *= n_dropped / (n_dropped + 1.) else: y_pred += self.learning_rate * scale[ m] * self.estimators_[m].predict(Xi) else: y_pred += self.learning_rate * best_learner.predict(Xi) # track deviance (= loss) if do_oob: self.train_score_[num_iter] = self.loss_( y[sample_mask], y_pred[sample_mask], sample_weight[sample_mask]) self.oob_improvement_[num_iter] = ( old_oob_score - self.loss_(y[~sample_mask], y_pred[~sample_mask], sample_weight[~sample_mask])) else: # no need to fancy index w/ no subsampling self.train_score_[num_iter] = self.loss_( y, y_pred, sample_weight) if self.verbose > 0: verbose_reporter.update(num_iter, self)