Exemplo n.º 1
0
    def set_default_params(self,
                           accuracy=10, time_tolerance=10, interpretability=1,
                           **kwargs):
        # https://catboost.ai/docs/concepts/python-reference_parameters-list.html
        #  https://catboost.ai/docs/concepts/python-reference_catboostclassifier.html
        # optimize for final model as transcribed from best lightgbm model
        n_estimators = self.params_base.get('n_estimators', 100)
        learning_rate = self.params_base.get('learning_rate', config.min_learning_rate)
        early_stopping_rounds_default = min(500, max(1, int(n_estimators / 4)))
        early_stopping_rounds = self.params_base.get('early_stopping_rounds', early_stopping_rounds_default)
        self.params = {'bootstrap_type': 'Bayesian',
                       'n_estimators': n_estimators,
                       'learning_rate': learning_rate,
                       'early_stopping_rounds': early_stopping_rounds,
                       'max_depth': 8
                      }

        dummy = kwargs.get('dummy', False)
        ensemble_level = kwargs.get('ensemble_level', 0)
        train_shape = kwargs.get('train_shape', (1, 1))
        valid_shape = kwargs.get('valid_shape', (1, 1))
        self.get_gbm_main_params_evolution(self.params, dummy, accuracy,
                                           self.num_classes,
                                           ensemble_level, train_shape,
                                           valid_shape)

        # self.params['has_time'] # should use this if TS problem

        if self._can_handle_categorical:
            # less than 2 is risky, can get stuck in learning
            max_cat_to_onehot_list = [4, 10, 20, 40, config.max_int_as_cat_uniques]
            self.params['one_hot_max_size'] = MainModel.get_one(max_cat_to_onehot_list, get_best=True)
            uses_gpus, n_gpus = self.get_uses_gpus(self.params)
            if uses_gpus:
                self.params['one_hot_max_size'] = min(self.params['one_hot_max_size'], 255)
Exemplo n.º 2
0
    def mutate_params(self, **kwargs):
        fake_lgbm_model = LightGBMModel(**self.input_dict)
        fake_lgbm_model.params = self.params.copy()
        fake_lgbm_model.params_base = self.params_base.copy()
        fake_lgbm_model.params.update(fake_lgbm_model.params_base)
        kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500))
        fake_lgbm_model.mutate_params(**kwargs)
        self.params.update(fake_lgbm_model.params)
        fake_lgbm_model.transcribe_params(params=self.params)
        self.params.update(fake_lgbm_model.lightgbm_params)

        # see what else can mutate, need to know things don't want to preserve
        uses_gpus, n_gpus = self.get_uses_gpus(self.params)
        if not uses_gpus:
            self.params['colsample_bylevel'] = MainModel.get_one(
                [0.3, 0.5, 0.9, 1.0])

        if not (uses_gpus and self.num_classes > 2):
            self.params['boosting_type'] = MainModel.get_one(
                ['Plain', 'Ordered'])

        if self._can_handle_categorical:
            max_cat_to_onehot_list = [
                4, 10, 20, 40, config.max_int_as_cat_uniques
            ]
            self.params['one_hot_max_size'] = MainModel.get_one(
                max_cat_to_onehot_list)
            if uses_gpus:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 255)
            else:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 65535)

        if not uses_gpus:
            self.params['sampling_frequency'] = MainModel.get_one(
                ['PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel'])

        bootstrap_type_list = [
            'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS',
            'Poisson', 'No'
        ]
        if not uses_gpus:
            bootstrap_type_list.remove('Poisson')
        if uses_gpus:
            bootstrap_type_list.remove('MVS')  # undocumented CPU only
        self.params['bootstrap_type'] = MainModel.get_one(bootstrap_type_list)

        if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']:
            self.params['subsample'] = MainModel.get_one(
                [0.5, 0.66, 0.66,
                 0.9])  # will get pop'ed if not Poisson/Bernoulli

        if self.params['bootstrap_type'] in ['Bayesian']:
            self.params['bagging_temperature'] = MainModel.get_one(
                [0, 0.1, 0.5, 0.9, 1.0])
Exemplo n.º 3
0
    def set_default_params(self,
                           accuracy=10,
                           time_tolerance=10,
                           interpretability=1,
                           **kwargs):
        # https://catboost.ai/docs/concepts/python-reference_parameters-list.html
        #  https://catboost.ai/docs/concepts/python-reference_catboostclassifier.html
        # optimize for final model as transcribed from best lightgbm model
        n_estimators = self.params_base.get('n_estimators', 100)
        learning_rate = self.params_base.get('learning_rate',
                                             config.min_learning_rate)
        early_stopping_rounds_default = min(500, max(1, int(n_estimators / 4)))
        early_stopping_rounds = self.params_base.get(
            'early_stopping_rounds', early_stopping_rounds_default)
        self.params = {
            'bootstrap_type': 'Bayesian',
            'n_estimators': n_estimators,
            'learning_rate': learning_rate,
            'early_stopping_rounds': early_stopping_rounds,
            'max_depth': 8,
            'grow_policy': 'depthwise',
        }

        dummy = kwargs.get('dummy', False)
        ensemble_level = kwargs.get('ensemble_level', 0)
        train_shape = kwargs.get('train_shape', (1, 1))
        valid_shape = kwargs.get('valid_shape', (1, 1))
        self.get_gbm_main_params_evolution(params=self.params,
                                           dummy=dummy,
                                           accuracy=accuracy,
                                           num_classes=self.num_classes,
                                           ensemble_level=ensemble_level,
                                           train_shape=train_shape,
                                           valid_shape=valid_shape)

        for k in kwargs:
            if k in self.params:
                self.params[k] = copy.deepcopy(kwargs[k])

        # self.params['has_time'] # should use this if TS problem

        if self._can_handle_categorical:
            # less than 2 is risky, can get stuck in learning
            max_cat_to_onehot_list = [
                4, 10, 20, 40, config.max_int_as_cat_uniques
            ]
            self.params['one_hot_max_size'] = MainModel.get_one(
                max_cat_to_onehot_list, get_best=True)
            uses_gpus, n_gpus = self.get_uses_gpus(self.params)
            if uses_gpus:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 255)
            else:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 65535)

        self.params['learning_rate'] = max(self._min_learning_rate_catboost,
                                           self.params['learning_rate'])

        # fill mutatable params with best for left over if default didn't fill
        params = copy.deepcopy(self.params)
        self.mutate_params(accuracy=accuracy,
                           time_tolerance=time_tolerance,
                           interpretability=interpretability,
                           get_best=True,
                           **kwargs)
        params_from_mutate = copy.deepcopy(self.params)
        for k in params_from_mutate:
            if k not in params:
                params[k] = params_from_mutate[k]
        self.params = copy.deepcopy(params)
Exemplo n.º 4
0
    def mutate_params(self, **kwargs):
        fake_lgbm_model = LightGBMModel(**self.input_dict)
        fake_lgbm_model.params = self.params.copy()
        fake_lgbm_model.params_base = self.params_base.copy()
        for k, v in fake_lgbm_model.params_base.items():
            if k in fake_lgbm_model.params:
                fake_lgbm_model.params[k] = fake_lgbm_model.params_base[k]
        kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500))
        kwargs['from_catboost'] = True
        fake_lgbm_model.mutate_params(**kwargs)
        self.params.update(fake_lgbm_model.params)
        fake_lgbm_model.transcribe_params(params=self.params, **kwargs)
        self.params.update(fake_lgbm_model.lightgbm_params)

        get_best = kwargs.get('get_best', True)
        if get_best is None:
            get_best = True
        trial = kwargs.get('trial', False)
        if trial is None:
            trial = False

        # see what else can mutate, need to know things don't want to preserve
        uses_gpus, n_gpus = self.get_uses_gpus(self.params)
        if not uses_gpus:
            colsample_bylevel_list = [0.3, 0.5, 0.9, 1.0]
            self.params['colsample_bylevel'] = MainModel.get_one(
                colsample_bylevel_list,
                get_best=get_best,
                best_type="first",
                name="colsample_bylevel",
                trial=trial)

        if not (uses_gpus and self.num_classes > 2):
            boosting_type_list = ['Plain', 'Ordered']
            self.params['boosting_type'] = MainModel.get_one(
                boosting_type_list,
                get_best=get_best,
                best_type="first",
                name="boosting_type",
                trial=trial)

        if self._can_handle_categorical:
            max_cat_to_onehot_list = [
                4, 10, 20, 40, config.max_int_as_cat_uniques
            ]
            if uses_gpus:
                max_one_hot_max_size = 255
            else:
                max_one_hot_max_size = 65535
            max_cat_to_onehot_list = sorted(
                set([
                    min(x, max_one_hot_max_size)
                    for x in max_cat_to_onehot_list
                ]))
            log = True if max(max_cat_to_onehot_list) > 1000 else False
            self.params['one_hot_max_size'] = MainModel.get_one(
                max_cat_to_onehot_list,
                get_best=get_best,
                best_type="max",
                name="one_hot_max_size",
                trial=trial,
                log=log)

        if not uses_gpus:
            sampling_frequency_list = [
                'PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel'
            ]
            self.params['sampling_frequency'] = MainModel.get_one(
                sampling_frequency_list,
                get_best=get_best,
                best_type="first",
                name="sampling_frequency",
                trial=trial)

        bootstrap_type_list = [
            'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS',
            'Poisson', 'No'
        ]
        if not uses_gpus:
            bootstrap_type_list.remove('Poisson')
        if uses_gpus:
            bootstrap_type_list.remove('MVS')  # undocumented CPU only
        self.params['bootstrap_type'] = MainModel.get_one(
            bootstrap_type_list,
            get_best=get_best,
            best_type="first",
            name="bootstrap_type",
            trial=trial)

        # lgbm usage already sets subsample
        #if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']:
        #    subsample_list = [0.5, 0.66, 0.66, 0.9]
        #    # will get pop'ed if not Poisson/Bernoulli
        #    self.params['subsample'] = MainModel.get_one(subsample_list, get_best=get_best, best_type="first", name="subsample", trial=trial)

        if self.params['bootstrap_type'] in ['Bayesian']:
            bagging_temperature_list = [0.0, 0.1, 0.5, 0.9, 1.0]
            self.params['bagging_temperature'] = MainModel.get_one(
                bagging_temperature_list,
                get_best=get_best,
                best_type="first",
                name="bagging_temperature",
                trial=trial)

        # overfit protection different sometimes compared to early_stopping_rounds
        # self.params['od_type']
        # self.params['od_pval']
        # self.params['od_wait']
        self.params['learning_rate'] = max(
            config.min_learning_rate,
            max(self._min_learning_rate_catboost,
                self.params['learning_rate']))