Пример #1
0
    def mutate_params(self, **kwargs):
        fake_lgbm_model = LightGBMModel(**self.input_dict)
        fake_lgbm_model.params = self.params.copy()
        fake_lgbm_model.params_base = self.params_base.copy()
        fake_lgbm_model.params.update(fake_lgbm_model.params_base)
        kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500))
        fake_lgbm_model.mutate_params(**kwargs)
        self.params.update(fake_lgbm_model.params)
        fake_lgbm_model.transcribe_params(params=self.params)
        self.params.update(fake_lgbm_model.lightgbm_params)

        # see what else can mutate, need to know things don't want to preserve
        uses_gpus, n_gpus = self.get_uses_gpus(self.params)
        if not uses_gpus:
            self.params['colsample_bylevel'] = MainModel.get_one(
                [0.3, 0.5, 0.9, 1.0])

        if not (uses_gpus and self.num_classes > 2):
            self.params['boosting_type'] = MainModel.get_one(
                ['Plain', 'Ordered'])

        if self._can_handle_categorical:
            max_cat_to_onehot_list = [
                4, 10, 20, 40, config.max_int_as_cat_uniques
            ]
            self.params['one_hot_max_size'] = MainModel.get_one(
                max_cat_to_onehot_list)
            if uses_gpus:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 255)
            else:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 65535)

        if not uses_gpus:
            self.params['sampling_frequency'] = MainModel.get_one(
                ['PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel'])

        bootstrap_type_list = [
            'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS',
            'Poisson', 'No'
        ]
        if not uses_gpus:
            bootstrap_type_list.remove('Poisson')
        if uses_gpus:
            bootstrap_type_list.remove('MVS')  # undocumented CPU only
        self.params['bootstrap_type'] = MainModel.get_one(bootstrap_type_list)

        if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']:
            self.params['subsample'] = MainModel.get_one(
                [0.5, 0.66, 0.66,
                 0.9])  # will get pop'ed if not Poisson/Bernoulli

        if self.params['bootstrap_type'] in ['Bayesian']:
            self.params['bagging_temperature'] = MainModel.get_one(
                [0, 0.1, 0.5, 0.9, 1.0])
Пример #2
0
    def mutate_params(self, **kwargs):
        fake_lgbm_model = LightGBMModel(**self.input_dict)
        fake_lgbm_model.params = self.params.copy()
        fake_lgbm_model.params_base = self.params_base.copy()
        for k, v in fake_lgbm_model.params_base.items():
            if k in fake_lgbm_model.params:
                fake_lgbm_model.params[k] = fake_lgbm_model.params_base[k]
        kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500))
        kwargs['from_catboost'] = True
        fake_lgbm_model.mutate_params(**kwargs)
        self.params.update(fake_lgbm_model.params)
        fake_lgbm_model.transcribe_params(params=self.params, **kwargs)
        self.params.update(fake_lgbm_model.lightgbm_params)

        get_best = kwargs.get('get_best', True)
        if get_best is None:
            get_best = True
        trial = kwargs.get('trial', False)
        if trial is None:
            trial = False

        # see what else can mutate, need to know things don't want to preserve
        uses_gpus, n_gpus = self.get_uses_gpus(self.params)
        if not uses_gpus:
            colsample_bylevel_list = [0.3, 0.5, 0.9, 1.0]
            self.params['colsample_bylevel'] = MainModel.get_one(
                colsample_bylevel_list,
                get_best=get_best,
                best_type="first",
                name="colsample_bylevel",
                trial=trial)

        if not (uses_gpus and self.num_classes > 2):
            boosting_type_list = ['Plain', 'Ordered']
            self.params['boosting_type'] = MainModel.get_one(
                boosting_type_list,
                get_best=get_best,
                best_type="first",
                name="boosting_type",
                trial=trial)

        if self._can_handle_categorical:
            max_cat_to_onehot_list = [
                4, 10, 20, 40, config.max_int_as_cat_uniques
            ]
            if uses_gpus:
                max_one_hot_max_size = 255
            else:
                max_one_hot_max_size = 65535
            max_cat_to_onehot_list = sorted(
                set([
                    min(x, max_one_hot_max_size)
                    for x in max_cat_to_onehot_list
                ]))
            log = True if max(max_cat_to_onehot_list) > 1000 else False
            self.params['one_hot_max_size'] = MainModel.get_one(
                max_cat_to_onehot_list,
                get_best=get_best,
                best_type="max",
                name="one_hot_max_size",
                trial=trial,
                log=log)

        if not uses_gpus:
            sampling_frequency_list = [
                'PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel'
            ]
            self.params['sampling_frequency'] = MainModel.get_one(
                sampling_frequency_list,
                get_best=get_best,
                best_type="first",
                name="sampling_frequency",
                trial=trial)

        bootstrap_type_list = [
            'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS',
            'Poisson', 'No'
        ]
        if not uses_gpus:
            bootstrap_type_list.remove('Poisson')
        if uses_gpus:
            bootstrap_type_list.remove('MVS')  # undocumented CPU only
        self.params['bootstrap_type'] = MainModel.get_one(
            bootstrap_type_list,
            get_best=get_best,
            best_type="first",
            name="bootstrap_type",
            trial=trial)

        # lgbm usage already sets subsample
        #if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']:
        #    subsample_list = [0.5, 0.66, 0.66, 0.9]
        #    # will get pop'ed if not Poisson/Bernoulli
        #    self.params['subsample'] = MainModel.get_one(subsample_list, get_best=get_best, best_type="first", name="subsample", trial=trial)

        if self.params['bootstrap_type'] in ['Bayesian']:
            bagging_temperature_list = [0.0, 0.1, 0.5, 0.9, 1.0]
            self.params['bagging_temperature'] = MainModel.get_one(
                bagging_temperature_list,
                get_best=get_best,
                best_type="first",
                name="bagging_temperature",
                trial=trial)

        # overfit protection different sometimes compared to early_stopping_rounds
        # self.params['od_type']
        # self.params['od_pval']
        # self.params['od_wait']
        self.params['learning_rate'] = max(
            config.min_learning_rate,
            max(self._min_learning_rate_catboost,
                self.params['learning_rate']))