def __init__(self): self.clf = sklearn.XGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=300, silent=True, objective=obj, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=0.25, reg_alpha=0, reg_lambda=0.5, scale_pos_weight=1, base_score=0.5, seed=0, missing=None) self.w = np.array([ np.array([ 1.7029e-02, 1.3079e-01, 6.1581e-02, -1.6783e-02, 3.3474e-02, -2.2277e-02, -2.1690e-01, 1.1374e-01, 7.1316e-02, 3.6111e-02, -1.9211e-01, 8.9843e-02, 1.0525e-02, -8.8967e-02, -1.6134e-01, -1.0343e-01, 3.8159e-02, 1.2840e-02, 1.4358e-01, -1.2254e-01, 1.4967e-01, 3.8851e-02, 8.4922e-02, 2.1995e-02, -1.7713e-01, 4.5296e-02, 5.0263e-02, 3.5791e-05, -1.4180e-01, 1.5155e-01, -7.8438e-02, -1.0855e-01, -1.0028e-01, -5.2810e-02, 7.0936e-02, 8.6607e-02, 6.8758e-02, -1.7710e-01, 3.1382e-02, 2.7970e-01, 3.8615e-01, 2.0975e-01, 1.1192e-02, -3.1998e-01, 1.9952e-01, 4.5477e-01, -6.7926e-02, -1.2770e-01, 8.1820e-02, 1.7651e-01, 3.3767e-02, 3.8274e-01, 8.7390e-03, -4.5134e-02, -5.6199e-02, -8.8637e-02, 7.9332e-02, -1.0147e-01, 1.7228e-01, -6.2791e-02, 2.2888e-03, 5.2206e-02, 1.0851e-01, 3.7676e-02, 1.0128e-01, 1.0922e-02, -1.9359e-01, 6.2475e-02, -5.5140e-02, 2.9518e-02, -2.3585e-02, -1.1021e-01, 1.2358e-01, 3.9869e-03, -3.0878e-02, -2.9022e-02, -2.5127e-02, -5.1951e-02, 6.4713e-02, 6.3186e-02, 4.3845e-02, -3.2788e-02, 8.0593e-03, 6.9834e-02, -5.3207e-02, 8.0649e-02, -7.0133e-02, -1.1874e-01, -2.0268e-01, 3.6341e-02, -2.8456e-02, 2.5505e-01, -5.9185e-02, -1.6351e-01, 2.0862e-01, 3.9112e-01, -1.7588e-02, 3.9111e-02, 2.9766e-01, 5.3394e-01, -4.8566e-03, 6.3414e-02, 2.7350e-01, -1.6731e-01, -2.6914e-02, -1.9693e-01, 1.4585e-01, 4.4899e-02, -3.2440e-02, 4.4213e-02, 1.1280e-01, 2.1263e-01, 1.1246e-01, -5.3757e-02, -1.4070e-01, 8.6012e-02, -1.2140e-01, 7.1008e-04, 1.3947e-02, -2.5169e-02, 1.7305e-01, -3.6080e-02, -6.7890e-02, 9.9060e-02, 4.4189e-02, -1.1350e-01, 1.4912e-01, 3.4591e-02, 5.1782e-02, 1.5098e-02, 8.5624e-03, -1.0366e-01, -6.0745e-02, 1.7117e-01, -5.4439e-02, -1.2122e-01, -2.8721e-01, -2.1258e-01, 3.5069e-02, 8.1284e-02, -2.1620e-01, -3.0161e-01 ]) for i in range(n_group) ]) self.x = None self.y = None self.loss = lambda w: loss(w, self.x, self.y) self.grad = lambda w: grad(w, self.x, self.y) self.select_loss = lambda i: (lambda w: loss(w, self.x[i], self.y[i])) self.select_grad = lambda i: (lambda w: grad(w, self.x[i], self.y[i]))
def __init__(self): self.clf = sklearn.XGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=300, silent=True, objective=obj, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=0.25, reg_alpha=1, reg_lambda=0.5, scale_pos_weight=1, base_score=0.5, seed=0, missing=None) self.w = np.array([ 3.0521e-02, 3.3850e-03, -2.7892e-02, 9.4246e-02, 1.2712e-01, 5.6794e-02, 1.9702e-01, 3.0102e-02, 8.1020e-02, 2.2443e-03, -3.6303e-02, -9.9930e-03, -7.2356e-03, -6.6374e-03, 7.2554e-02, -1.0639e-02, -8.9164e-02, -7.6698e-02, -7.3221e-02, -2.6325e-02, 1.5297e-02, -6.1099e-03, -1.6564e-02, 1.1742e-03, -7.7687e-03, -4.0734e-02, 3.5347e-02, -8.9857e-03, -1.0205e-02, -3.5139e-02, 8.7736e-03, -2.6164e-02, -7.4057e-04, 6.9800e-02, 5.1630e-02, 8.2260e-02, -4.3334e-02, 9.5439e-02, 3.8949e-02, 2.7576e-02, -2.7300e-02, -1.9236e-02, 1.3960e-02, -9.1715e-02, -8.0246e-02, 1.6001e-01, -1.4912e-01, -1.1418e-01, -1.3520e-01, 5.8030e-02, 1.8183e-01, -3.1726e-02, -7.4795e-02, -5.3430e-02, -4.1667e-02, 2.4433e-02, -1.5640e-02, -2.0981e-02, 4.8331e-03, -2.2744e-02, 2.1778e-02, -9.1474e-03, -2.7065e-02, -1.3960e-03, 3.1320e-02, 2.4609e-02, 2.7434e-02, 1.4061e-02, -3.9493e-03, 1.7370e-02, 5.4428e-03, 4.9994e-03, 1.1100e-02, 1.3571e-02, 2.6117e-03, 3.6254e-03, 1.2581e-02, 2.2057e-02, -1.5871e-02, 1.3411e-02, -1.6218e-02, -4.9300e-02, -4.8487e-02, -6.6901e-02, -1.9708e-02, -3.6207e-02, 2.7848e-02, 3.3245e-02, -2.5913e-02, 4.8864e-02, 1.7982e-02, 7.2035e-02, 9.8399e-03, -1.2854e-01, 1.2498e-01, 2.5496e-01, 4.8815e-01, 1.2856e-02, 2.7124e-02, -1.1177e-01, -6.9739e-02, -7.9357e-02, -1.3767e-01, -3.4607e-02, -9.0663e-02, 2.0239e-03, 6.8687e-02, -2.8339e-02, -2.3041e-02, 7.7071e-03, -4.1781e-02, 3.0516e-02, 3.4045e-02, 5.5087e-02, 5.4454e-02, 1.6309e-02, 1.5335e-03, 1.3867e-02, 1.8400e-02, 3.6903e-03, 2.1292e-02, 3.8298e-02, -3.4507e-02, 2.0960e-03, 3.4506e-03, 1.3975e-02, -2.4490e-02, 2.9441e-02, -2.5951e-02, 1.5139e-02, -4.7242e-02, -1.0273e-01, 8.0461e-03, -6.2661e-02, 2.7466e-02, -4.3963e-03, -4.4565e-02, 1.3144e-02, -7.3661e-02, 5.3355e-02, -3.5869e-03, -5.7825e-02, 1.8184e-01, 3.0521e-01, 4.2624e-01 ]) self.x = None self.y = None self.loss = lambda w: loss(w, self.x, self.y) self.grad = lambda w: grad(w, self.x, self.y)
def _xgboost_gridsearch_model( task, numeric_features, categoric_features, learning_rate, use_dask, n_iter, scoring, ): param_space = { 'clf__max_depth': randint(2, 11), 'clf__min_child_weight': randint(1, 11), 'clf__subsample': uniform(0.5, 0.5), 'clf__colsample_bytree': uniform(0.5, 0.5), 'clf__colsample_bylevel': uniform(0.5, 0.5), 'clf__gamma': uniform(0, 1), 'clf__reg_alpha': uniform(0, 1), 'clf__reg_lambda': uniform(0, 10), 'clf__base_score': uniform(0.1, 0.9), 'clf__scale_pos_weight': uniform(0.1, 9.9), } model = (xgbsk.XGBClassifier(learning_rate=learning_rate) if task == 'classification' else xgbsk.XGBRegressor( learning_rate=learning_rate)) pipe = Pipeline([ ( 'preprocessing', simple_proc_for_tree_algoritms(numeric_features, categoric_features), ), ('clf', model), ]) if use_dask: from dask_ml.model_selection import RandomizedSearchCV return RandomizedSearchCV(pipe, param_space, n_iter=n_iter, scoring=scoring, cv=5) else: from sklearn.model_selection import RandomizedSearchCV return RandomizedSearchCV(pipe, param_space, n_iter=n_iter, scoring=scoring, cv=5)
def __init__(self): self.clf = sklearn.XGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=200, silent=True, objective='reg:linear', gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=0.25, reg_alpha=0, reg_lambda=0.5, scale_pos_weight=1, base_score=0.5, seed=0, missing=None)
def train_save(pred_period=20, is_high=True, is_clf=False): data = gen_dataset(is_high=is_high, is_clf=is_clf, pred_period=pred_period) if is_clf: _, y_train = data["train"] scale_pos_weight = sum(y_train == 0) / sum(y_train == 1) if not is_clf: models = [ lgbm.LGBMRegressor(n_estimators=300, num_leaves=100, max_depth=8, random_state=0), xgb.XGBRegressor(n_estimators=300, max_depth=5, random_state=0) ] else: models = [ lgbm.LGBMClassifier(n_estimators=300, scale_pos_weight=0.1, num_leaves=100, max_depth=8, random_state=0), xgb.XGBClassifier( n_estimators=300, scale_pos_weight=0.1, max_depth=5, random_state=0, ) ] y_pred_list = train(data, models, is_clf=is_clf) # save model for model in models: save_model(model, pred_period, is_high) return y_pred_list
smape = metric(pred,test_y) return model,smape """ #param = {"learning_rate":0.1,"n_estimators":1000,"max_depth":5, # "min_child_weight":1,"gamma":0,"subsample":1,"colsample_bytree":1, # "objective":'reg:linear',"nthread":4,"scale_pos_weight":1,"seed":27} param = { "learning_rate": 0.8, "gamma": 0, "subsample": 1, "colsample_bytree": 1, "max_depth": 5, "objective": 'reg:linear', "seed": 27 } model = sklearn.XGBRegressor(**param) """ param_cv_1 = {"learning_rate":[0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15]} param_cv_2 = {"n_estimators":[int(x) for x in np.linspace(100,2000,20)]} param_cv_3 = {"max_depth":[3,4,5,6,7,8]} param_cv_4 = {"min_child_weight":[0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]} param_cv_5 = {"scale_pos_weight":[0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]} def choose_best_param(model,param_cv,data_x,data_y): clf = GridSearchCV(estimator=model,param_grid=param_cv,error_score=SMAPE) clf.fit(data_x,data_y) return clf.best_params_ param_all = [param_cv_1,param_cv_2,param_cv_3,param_cv_4,param_cv_5] best_param = param.copy()
import time import pandas as pd from sklearn.ensemble import GradientBoostingRegressor, \ AdaBoostRegressor, RandomForestRegressor boston = load_boston() X = boston.data y = boston.target # Make a validation set X_train, X_validation, y_train, y_validation = train_test_split(X, y, random_state=1848) # Sci-Kit Learn's Out of the Box Gradient Tree Implementation sklearn_boost = GradientBoostingRegressor(random_state=1849) t1 = time.time() sklearn_boost.fit(X_train, y_train.ravel()) print('Training Error: {:.3f}'.format(1 - sklearn_boost.score(X_train, y_train))) print('Validation Error: {:.3f}'.format(1 - sklearn_boost.score(X_validation, y_validation))) # %timeit sklearn_boost.fit(X_train, y_train.ravel()) # ipython语句,用于测试该语句运行的时间 # XGBoost xgb_boost = xgb.XGBRegressor(seed=1850) xgb_boost.fit(X_train, y_train.ravel()) print('Training Error: {:.3f}'.format(1 - xgb_boost.score(X_train, y_train))) print('Validation Error: {:.3f}'.format(1 - xgb_boost.score(X_validation, y_validation))) # %timeit xgb_boost.fit(X_train, y_train.ravel())
# Inspect data percent_missing = X_test.isnull().sum() * 100 / len(X_test) missing_value_df = pd.DataFrame({ 'column_name': X_test.columns, 'percent_missing': percent_missing }) missing_value_df.sort_values('percent_missing', inplace=True) # X_train is missing 3%-10% of the values # Create pipeline pipe = Pipeline([ # the scale stage is populated by the param_grid ('impute', SimpleImputer()), ('scale', 'passthrough'), ('selection', SelectKBest(f_regression)), ('estimation', xgb.XGBRegressor()) ]) # Specify parameters to be searched over param_grid = [{ 'scale': [RobustScaler()], # StandardScaler(),Normalizer() 'impute__strategy': ['mean'], # , 'median' 'selection__k': [100], 'estimation__max_depth': [5], 'estimation__min_child_weight': [1], 'estimation__gamma': [0], 'estimation__subsample': [0.8], 'estimation__colsample_bytree': [0.8] }] # Gridsearch search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, scoring='r2')