def _fit_cv(self, X, y, model_no): # We only look at cv across folds - no storing of models or results with 'cv' evals = [] i = 0 for train_index, test_index in self.folds_strategy.split(X, y): # Loop over the different folds. X_train, X_test, y_train, y_test = split_folds( train_index, test_index, X, y) # Fit on each fold for each model. # We'll add a try condition here for using x_test as a validation set - useful of XGB early stopping. try: self.base_estimators[model_no].fit(X_train, y_train, X_test, y_test) except: self.base_estimators[model_no].fit(X_train, y_train) if self.estimator_type is 'regression': predicted_y = self.base_estimators[model_no].predict(X_test) elif self.estimator_type is 'classification': predicted_y = self.base_estimators[model_no].predict_proba( X_test) if self.num_classes is 2 and 'sklearn' in str( type(self.base_estimators[model_no])): predicted_y = predicted_y[:, 1] if self.feval is not None: fold_score = self.feval(y_test, predicted_y) evals.append(fold_score) print('Fold{}: {}'.format(i + 1, evals[i])) i += 1 print('CV Mean: ', np.mean(evals), ' Std: ', np.std(evals)) return
def _fit_s(self, X, y, model_no): # Fit a model that stacks for CV folds, predicts the out-of-fold rows for X, and then runs a predict on the # test set, the final test set prediction is the average from all fold models. evals = [] fold_fits = {} i = 0 for train_index, test_index in self.folds_strategy.split(X, y): # Loop over the different folds. X_train, X_test, y_train, y_test = split_folds( train_index, test_index, X, y) # Fit on each fold for each model. # We'll add a try condition here for using x_test as a validation set - useful of XGB early stopping. try: self.base_estimators[model_no].fit(X_train, y_train, X_test, y_test) except: self.base_estimators[model_no].fit(X_train, y_train) # Predict on the out of fold set if self.estimator_type is 'regression': predicted_y = self.base_estimators[model_no].predict(X_test) self.stacking_train.ix[ test_index, self.base_estimators_names[model_no]] = predicted_y elif self.estimator_type is 'classification': predicted_y = self.base_estimators[model_no].predict_proba( X_test) if self.num_classes == 2: if 'sklearn' in str(type(self.base_estimators[model_no])): predicted_y = predicted_y[:, 1] self.stacking_train.ix[ test_index, self.base_estimators_names[model_no]] = predicted_y elif self.num_classes > 2: self.stacking_train.ix[test_index, [ self.base_estimators_names[model_no] + '_class_' + str(j) for j in range(self.num_classes) ]] = predicted_y # Finally save the base_estimator.fit object for each fold of the data set. # In predict we need to loop through these to get an average prediction for the test set. # We create a model specific dictionary and we append each fold to this fold_fits[self.base_estimators_names[model_no] + 'fold' + str(i)] = self.base_estimators[model_no] # Evaluate the Folds if self.feval is not None: fold_score = self.feval(y_test, predicted_y) evals.append(fold_score) print('Fold{}: {}'.format(i + 1, evals[i])) i += 1 print('CV Mean: ', np.mean(evals), ' Std: ', np.std(evals)) # Last part add to the fold estimators self.fold_estimators[self.base_estimators_names[model_no]] = fold_fits return
def _fit_st(self, X, y, model_no): # Fit a model that stacks for CV folds, predicts the out-of-fold rows for the X and then runs a full fit on # the data to use for preditions. evals = [] i = 0 for train_index, test_index in self.folds_strategy.split(X, y): # Loop over the different folds. X_train, X_test, y_train, y_test = split_folds( train_index, test_index, X, y) # Fit on each fold for each model. # We'll add a try condition here for using x_test as a validation set - useful of XGB early stopping. try: self.base_estimators[model_no].fit(X_train, y_train, X_test, y_test) except: self.base_estimators[model_no].fit(X_train, y_train) # Predict on the out of fold set if self.estimator_type is 'regression': predicted_y = self.base_estimators[model_no].predict(X_test) self.stacking_train.ix[ test_index, self.base_estimators_names[model_no]] = predicted_y elif self.estimator_type is 'classification': predicted_y = self.base_estimators[model_no].predict_proba( X_test) if self.num_classes == 2: if 'sklearn' in str(type(self.base_estimators[model_no])): predicted_y = predicted_y[:, 1] self.stacking_train.ix[ test_index, self.base_estimators_names[model_no]] = predicted_y elif self.num_classes > 2: self.stacking_train.ix[test_index, [ self.base_estimators_names[model_no] + '_class_' + str(i) for i in range(self.num_classes) ]] = predicted_y # Evaluate the Folds if self.feval is not None: assert (len(y_test) == len(predicted_y)) fold_score = self.feval(y_test, predicted_y) evals.append(fold_score) print('Fold{}: {}'.format(i + 1, evals[i])) i += 1 print('CV Mean: ', np.mean(evals), ' Std: ', np.std(evals)) # Finally fit against all the data self._fit_t(X, y, model_no) return