Пример #1
0
    def _fit_cv(self, X, y, model_no):
        # We only look at cv across folds - no storing of models or results with 'cv'
        evals = []
        i = 0
        for train_index, test_index in self.folds_strategy.split(X, y):
            # Loop over the different folds.
            X_train, X_test, y_train, y_test = split_folds(
                train_index, test_index, X, y)

            # Fit on each fold for each model.
            # We'll add a try condition here for using x_test as a validation set - useful of XGB early stopping.
            try:
                self.base_estimators[model_no].fit(X_train, y_train, X_test,
                                                   y_test)
            except:
                self.base_estimators[model_no].fit(X_train, y_train)

            if self.estimator_type is 'regression':
                predicted_y = self.base_estimators[model_no].predict(X_test)
            elif self.estimator_type is 'classification':
                predicted_y = self.base_estimators[model_no].predict_proba(
                    X_test)
                if self.num_classes is 2 and 'sklearn' in str(
                        type(self.base_estimators[model_no])):
                    predicted_y = predicted_y[:, 1]

            if self.feval is not None:
                fold_score = self.feval(y_test, predicted_y)
                evals.append(fold_score)
                print('Fold{}: {}'.format(i + 1, evals[i]))
                i += 1
        print('CV Mean: ', np.mean(evals), ' Std: ', np.std(evals))
        return
Пример #2
0
    def _fit_s(self, X, y, model_no):
        # Fit a model that stacks for CV folds, predicts the out-of-fold rows for X, and then runs a predict on the
        # test set, the final test set prediction is the average from all fold models.
        evals = []
        fold_fits = {}
        i = 0

        for train_index, test_index in self.folds_strategy.split(X, y):
            # Loop over the different folds.
            X_train, X_test, y_train, y_test = split_folds(
                train_index, test_index, X, y)

            # Fit on each fold for each model.
            # We'll add a try condition here for using x_test as a validation set - useful of XGB early stopping.
            try:
                self.base_estimators[model_no].fit(X_train, y_train, X_test,
                                                   y_test)
            except:
                self.base_estimators[model_no].fit(X_train, y_train)

            # Predict on the out of fold set
            if self.estimator_type is 'regression':
                predicted_y = self.base_estimators[model_no].predict(X_test)
                self.stacking_train.ix[
                    test_index,
                    self.base_estimators_names[model_no]] = predicted_y
            elif self.estimator_type is 'classification':
                predicted_y = self.base_estimators[model_no].predict_proba(
                    X_test)
                if self.num_classes == 2:
                    if 'sklearn' in str(type(self.base_estimators[model_no])):
                        predicted_y = predicted_y[:, 1]
                    self.stacking_train.ix[
                        test_index,
                        self.base_estimators_names[model_no]] = predicted_y
                elif self.num_classes > 2:
                    self.stacking_train.ix[test_index, [
                        self.base_estimators_names[model_no] + '_class_' +
                        str(j) for j in range(self.num_classes)
                    ]] = predicted_y

            # Finally save the base_estimator.fit object for each fold of the data set.
            # In predict we need to loop through these to get an average prediction for the test set.
            # We create a model specific dictionary and we append each fold to this
            fold_fits[self.base_estimators_names[model_no] + 'fold' +
                      str(i)] = self.base_estimators[model_no]
            # Evaluate the Folds
            if self.feval is not None:
                fold_score = self.feval(y_test, predicted_y)
                evals.append(fold_score)
                print('Fold{}: {}'.format(i + 1, evals[i]))
            i += 1

        print('CV Mean: ', np.mean(evals), ' Std: ', np.std(evals))
        # Last part add to the fold estimators
        self.fold_estimators[self.base_estimators_names[model_no]] = fold_fits
        return
Пример #3
0
    def _fit_st(self, X, y, model_no):
        # Fit a model that stacks for CV folds, predicts the out-of-fold rows for the X and then runs a full fit on
        # the data to use for preditions.
        evals = []
        i = 0

        for train_index, test_index in self.folds_strategy.split(X, y):
            # Loop over the different folds.
            X_train, X_test, y_train, y_test = split_folds(
                train_index, test_index, X, y)

            # Fit on each fold for each model.
            # We'll add a try condition here for using x_test as a validation set - useful of XGB early stopping.
            try:
                self.base_estimators[model_no].fit(X_train, y_train, X_test,
                                                   y_test)
            except:
                self.base_estimators[model_no].fit(X_train, y_train)
            # Predict on the out of fold set
            if self.estimator_type is 'regression':
                predicted_y = self.base_estimators[model_no].predict(X_test)
                self.stacking_train.ix[
                    test_index,
                    self.base_estimators_names[model_no]] = predicted_y
            elif self.estimator_type is 'classification':
                predicted_y = self.base_estimators[model_no].predict_proba(
                    X_test)
                if self.num_classes == 2:
                    if 'sklearn' in str(type(self.base_estimators[model_no])):
                        predicted_y = predicted_y[:, 1]
                    self.stacking_train.ix[
                        test_index,
                        self.base_estimators_names[model_no]] = predicted_y
                elif self.num_classes > 2:
                    self.stacking_train.ix[test_index, [
                        self.base_estimators_names[model_no] + '_class_' +
                        str(i) for i in range(self.num_classes)
                    ]] = predicted_y
            # Evaluate the Folds
            if self.feval is not None:
                assert (len(y_test) == len(predicted_y))
                fold_score = self.feval(y_test, predicted_y)
                evals.append(fold_score)
                print('Fold{}: {}'.format(i + 1, evals[i]))
                i += 1

        print('CV Mean: ', np.mean(evals), ' Std: ', np.std(evals))
        # Finally fit against all the data
        self._fit_t(X, y, model_no)
        return