Пример #1
0
def evaluate(params, X, y):
    
    # Initilize instance of estimator
    est = LGBMRegressor(boosting='gbdt', n_jobs=-1, random_state=2018)
        
    # Set params
    est.set_params(**params)
    
    # Calc CV score
    scores = cross_val_score(estimator=est, X=X, y=y, 
                             scoring='r2', cv=4)
    score = np.mean(scores)

    return score
Пример #2
0
                 rstate=np.random.RandomState(seed=2018))

# Print best parameters
best_params = space_eval(hyper_space, best_vals)
print("BEST PARAMETERS: " + str(best_params))

# Print best CV score
scores = [-trial['result']['loss'] for trial in trials.trials]
print("BEST CV SCORE: " + str(np.max(scores)))

# Print execution time
tdiff = trials.trials[-1]['book_time'] - trials.trials[0]['book_time']
print("ELAPSED TIME: " + str(tdiff.total_seconds() / 60))    

# Set params
est.set_params(**best_params)

# Fit    
est.fit(X_train, y_train)
y_pred = est.predict(X_test)

# Predict
score = r2_score(y_test, y_pred)
print("R2 SCORE ON TEST DATA: {}".format(score))

#==============================================================================
# Tree structure of hyperparameter space (Optional)
#============================================================================== 
# You must change the evaluate function in order to extract learning rate 
# and n_estimators from choices. Please add the following code to the start of 
# evaluate function
Пример #3
0
class LightGBM(BaseModel):
    """XGBoost Class."""
    def __init__(self,
                 tuning_metric='mse',
                 trials='trials',
                 bottom_coding=None,
                 transform=None,
                 **kwargs):
        """Initialize hyperparameters."""
        super(LightGBM, self).__init__(bottom_coding=bottom_coding,
                                       transform=transform)
        self.model = LGBMRegressor
        self.tuning_metric = tuning_metric
        self.trials = Trials() \
            if trials == 'trials' \
            else MongoTrials('mongo://localhost:1234/foo_db/jobs',
                             exp_key='exp1')
        self.set_parameters()

    def set_parameters(self):
        """Set the model hyperparameter sweep."""
        self.space = {
            "objective":
            self.tuning_metric,
            "device":
            "gpu",
            'min_data_in_leaf':
            hp.choice('min_data_in_leaf', [100, 1000, 300]),
            'boosting_type':
            hp.choice('boosting_type', ['gbdt']),
            'num_leaves':
            scope.int(hp.quniform('num_leaves', 30, 250, 1)),
            'learning_rate':
            hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
            'subsample_for_bin':
            scope.int(hp.quniform('subsample_for_bin', 20000, 300000, 20000)),
            'reg_alpha':
            hp.uniform('reg_alpha', 0.0, 1.0),
            'reg_lambda':
            hp.uniform('reg_lambda', 0.0, 1.0),
            'colsample_bytree':
            hp.uniform('colsample_by_tree', 0.6, 1.0)
        }

    def tune(self, training_set, logger=None, saver=None):
        self.training_set = training_set
        objective = generate_objective(self.training_set, self.tuning_metric)
        best = space_eval(
            self.space,
            fmin(fn=objective,
                 space=self.space,
                 trials=self.trials,
                 algo=tpe.suggest,
                 max_evals=self.max_evals))

        print(f'Search space: {self.space}')
        print(f'Best hyperparams: {best}')

        self.model = LGBMRegressor()
        self.model.set_params(**best)
        self.model.fit(training_set.X, training_set.y)

    def instantiate_model(self, params):
        model = LGBMRegressor()
        model.set_params(**params)
        return model
min_child_weight=0.001,
min_child_samples=20,
min_split_gain=0.1,
subsample=0.8,
colsample_bytree=0.8,
objective= 'binary',
random_state=7)

lgbm_param = lgbm_model.get_params()
lgbm_train = lgb.Dataset(X,Y)

'''使用交叉验证的方式确定最优的树数量'''
cvresult = lgb.cv(lgbm_param, lgbm_train, num_boost_round=lgbm_param['n_estimators'],nfold=5,metrics='auc',early_stopping_rounds=100)
best_n_estimators=len(cvresult['auc-mean'])

lgbm_model.set_params(n_estimators=best_n_estimators)
lgbm_model.fit(X,Y,eval_metric='auc')
feat_imp = pd.Series(lgbm_model.feature_importances_,index=X.columns)   
feat_imp=feat_imp.sort_values(ascending=False)



valid_feature_num=len(np.where(feat_imp>0)[0]) #有效变量是有feature_importance的变量(在lgbm树模型中有贡献的变量,其他的变量没有用到)
print(valid_feature_num)


# In[74]:


''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''  LGB调优feature_num          '''''''''''
Пример #5
0
 def instantiate_model(self, params):
     model = LGBMRegressor()
     model.set_params(**params)
     return model