Пример #1
0
    def _fit_child(self, skf, xs_train, y_train):
        """Build stage0 models from the training set (xs_train, y_train).

        Parameters
        ----------
        skf: StratifiedKFold-like iterator
            Use for cross validation blending.

        xs_train : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y_train : array-like, shape = [n_samples]
            The target values (class labels in classification).

        Returns
        -------
        blend_train : array-like, shape = [n_samples]
            For stage1 model training.
        blend_test : array-like, shape = [n_samples]
            If you use TwoStageKFold, blended sample for test will be prepared.
        """
        blend_train = None
        blend_test = None
        for j, clf in enumerate(self.clfs):
            self._out_to_console('Training classifier [{0}]'.format(j), 0)
            all_learner_key = str(type(clf)) + str(j)
            self.all_learner[all_learner_key] = []
            blend_train_j = None
            for i, (train_index, cv_index) in enumerate(skf):
                now_learner = clone(clf)
                self.all_learner[all_learner_key].append(now_learner)
                xs_now_train = xs_train[train_index]
                y_now_train = y_train[train_index]
                xs_cv = xs_train[cv_index]
                #y_cv = y_train[cv_index] no use

                if not hasattr(now_learner, 'id'):
                    now_learner.id = self.get_stage0_id(now_learner)

                dump_file = util.get_cache_file(now_learner.id,
                                                cv_index,
                                                suffix='pkl')
                if self.save_stage0 and self._is_saved(now_learner, cv_index):
                    print('Prediction cache exists: skip fitting.')
                    now_learner = joblib.load(dump_file)
                    self.all_learner[all_learner_key][-1] = now_learner
                else:
                    self._out_to_console('Fold [{0}]'.format(i), 0)
                    now_learner.fit(xs_now_train, y_now_train)
                    if self.save_stage0:
                        joblib.dump(now_learner, dump_file, compress=True)

                if blend_train_j is None:
                    blend_train_j = self._get_blend_init(y_train, now_learner)
                blend_train_j[cv_index] = self._get_child_predict(
                    now_learner, xs_cv, cv_index)
            blend_train = numpy_c_concatenate(blend_train, blend_train_j)
        return blend_train, blend_test
Пример #2
0
    def _fit_child(self, skf, xs_train, y_train):
        """Build stage0 models from the training set (xs_train, y_train).

        Parameters
        ----------
        skf: StratifiedKFold-like iterator
            Use for cross validation blending.

        xs_train : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y_train : array-like, shape = [n_samples]
            The target values (class labels in classification).

        Returns
        -------
        blend_train : array-like, shape = [n_samples]
            For stage1 model training.
        blend_test : array-like, shape = [n_samples]
            If you use TwoStageKFold, blended sample for test will be prepared.
        """
        blend_train = None
        blend_test = None
        for j, clf in enumerate(self.clfs):
            self._out_to_console('Training classifier [{0}]'.format(j), 0)
            all_learner_key = str(type(clf)) + str(j)
            self.all_learner[all_learner_key] = []
            blend_train_j = None
            for i, (train_index, cv_index) in enumerate(skf):
                now_learner = clone(clf)
                self.all_learner[all_learner_key].append(now_learner)
                xs_now_train = xs_train[train_index]
                y_now_train = y_train[train_index]
                xs_cv = xs_train[cv_index]
                #y_cv = y_train[cv_index] no use

                if not hasattr(now_learner, 'id'):
                    now_learner.id = self.get_stage0_id(now_learner)

                dump_file = util.get_cache_file(now_learner.id,
                                                cv_index,
                                                suffix='pkl')
                if self.save_stage0 and self._is_saved(now_learner, cv_index):
                    print('Prediction cache exists: skip fitting.')
                    now_learner = joblib.load(dump_file)
                    self.all_learner[all_learner_key][-1] = now_learner
                else:
                    self._out_to_console('Fold [{0}]'.format(i), 0)
                    now_learner.fit(xs_now_train, y_now_train)
                    if self.save_stage0:
                        joblib.dump(now_learner, dump_file, compress=True)

                if blend_train_j is None:
                    blend_train_j = self._get_blend_init(y_train, now_learner)
                blend_train_j[cv_index] = self._get_child_predict(now_learner, xs_cv, cv_index)
            blend_train = numpy_c_concatenate(blend_train, blend_train_j)
        return blend_train, blend_test
Пример #3
0
 def fit(self, xs_train, y_train, index=None):
     dump_file = ""
     if index is not None:
         dump_file = util.get_cache_file(self.estimator.id,
                                         index,
                                         cache_dir=self.cache_dir,
                                         suffix='pkl')
     if self.skip_refit and os.path.isfile(dump_file):
         if index is not None:
             self.estimator = joblib.load(dump_file)
     else:
         self.estimator.fit(xs_train, y_train)
         if index is not None:
             joblib.dump(self.estimator, dump_file, compress=True)
     return self
Пример #4
0
 def fit(self, xs_train, y_train, index=None):
     dump_file = ""
     if index is not None:
         dump_file = util.get_cache_file(self.estimator.id,
                                         index,
                                         cache_dir=self.cache_dir,
                                         suffix='pkl')
     if self.skip_refit and os.path.isfile(dump_file):
         if index is not None:
             self.estimator = joblib.load(dump_file)
     else:
         self.estimator.fit(xs_train, y_train)
         if index is not None:
             joblib.dump(self.estimator, dump_file, compress=True)
     return self
Пример #5
0
 def _is_saved(self, model, index):
     model_id = self.get_stage0_id(model)
     return os.path.isfile(util.get_cache_file(model_id, index))
Пример #6
0
 def _is_saved(self, model, index):
     model_id = self.get_stage0_id(model)
     return os.path.isfile(util.get_cache_file(model_id, index))