Python XGBRegressor.XGBRegressor示例，xgboost.sklearn.XGBRegressor.XGBRegressor Python示例

示例#1

0

显示文件

文件： model_manager.py 项目： aguillenATC/MuonCount

    def fit_xgboost_regression(self):
        if (self.X_val is not None):
            X_train_aux = pd.concat([pd.DataFrame(self.X_train.copy()), pd.DataFrame(self.X_val.copy())])
            y_train_aux = pd.Series(
                pd.concat([pd.DataFrame(self.y_train.copy()), pd.DataFrame(self.y_val.copy())]).values.reshape(
                    self.y_train.shape[0] + self.y_val.shape[0], ))
        else:
            X_train_aux = self.X_train
            y_train_aux = self.y_train



        xgbreg = XGBRegressor(nthreads=-1)
        params = {
            "max_depth": [i for i in range(5,55,5)],
            "learning_rate": [0.001,0.01,0.1],
            "gamma": [i for i in range(1,20)],
            "n_estimators": [i * 10 for i in range(5, 55, 5)]
        }
        self.gs_xgboost = RandomizedSearchCV(xgbreg, params, n_jobs=-1,verbose=2)
        self.gs_xgboost.fit(X_train_aux, y_train_aux)

        self.xgboost_reg_model = self.gs_xgboost.best_estimator_
        #self.xgboost_reg_model = self.xgboost_reg_model.fit(X_train_aux, y_train_aux)

        return self.xgboost_reg_model

示例#2

0

显示文件

def generate_XGB_model(train_df):
    train_df.drop(['conversionTime'], axis=1, inplace=True)
    print 'Train And Fix Missing App Count Value...'
    train_df, xgb_appcount = train_model_for_appcounts(train_df)
    joblib.dump(xgb_appcount, 'XGB_missing.model')
    '''print 'Train And Fix Missing Age Value...'
    train_df, xgb_age = train_model_for_age(train_df)
    joblib.dump(xgb_age, 'XGB_age.model')'''
    train_df.drop(['marriageStatus', 'haveBaby', 'sitesetID', 'positionType'],
                  axis=1,
                  inplace=True)
    print 'Done'
    print train_df.info()
    print train_df.describe()
    print train_df.isnull().sum()
    train_np = train_df.as_matrix()
    y = train_np[:, 0]
    X = train_np[:, 1:]
    print 'Train Xgboost Model...'
    start_time = datetime.datetime.now()
    xbg_clf = XGBRegressor(n_estimators=100,
                           max_depth=6,
                           objective="binary:logistic",
                           silent=False)
    xbg_clf.fit(X, y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)
    model_df = pd.DataFrame({
        'columns': list(train_df.columns)[1:],
        'values': xbg_clf.feature_importances_
    })
    print model_df
    return xbg_clf

示例#3

0

显示文件

文件： featureimportance_chk_withRandCol.py 项目： ntdsyq/house_price_prediction

def FI_xgb_sklearn():
    X, y = load_traindata(encodetype='le')
    cols = list(X.columns)

    rndcol = np.random.randn(X.shape[0])
    X = np.column_stack((X, rndcol))
    cols.append('random')

    xgb1 = XGBRegressor(learning_rate=0.01,
                        n_estimators=3320,
                        max_depth=3,
                        min_child_weight=4,
                        colsample_bytree=0.8,
                        subsample=0.8,
                        importance_type='total_gain',
                        objective='reg:linear',
                        n_jobs=-1,
                        random_state=0,
                        seed=27,
                        silent=True)

    xgb1.fit(X, y)

    imp = sorted(list(zip(cols, xgb1.feature_importances_)),
                 key=lambda t: abs(t[1]),
                 reverse=True)
    imp = pd.DataFrame(imp, columns=['Feature', 'Importance'])
    rnd_idx = np.argwhere(imp['Feature'] == 'random')[0][0]
    print(imp.iloc[:rnd_idx + 1, :])
    return imp

示例#4

0

显示文件

    def skl_cv(self):
        logging.info("{0}:正在进行网格搜索".format(self.now_time()))
        if self.model == 'C':
            grid_search = GridSearchCV(estimator=self.rf,
                                       param_grid=self.cv_param,
                                       scoring='accuracy')
            grid_search.fit(self.X_train, self.Y_train)
            logging.info("{0}:最优参数:{1}".format(self.now_time(),
                                               grid_search.best_params_))
            logging.info("{0}:最优参数acc结果:{1}".format(self.now_time(),
                                                    grid_search.best_score_))
            self.rf = XGBClassifier(
                n_estimators=grid_search.best_params_['n_estimators'],
                max_depth=grid_search.best_params_['max_depth'],
                min_child_weight=grid_search.best_params_['min_child_weight'],
                gamma=grid_search.best_params_['gamma'],
                learning_rate=grid_search.best_params_['learning_rate'])

        elif self.model == 'R':
            grid_search = GridSearchCV(estimator=self.rf,
                                       param_grid=self.cv_param,
                                       scoring='neg_mean_absolute_error')

            grid_search.fit(self.X_train, self.Y_train)
            logging.info("{0}:最优参数:{1}".format(self.now_time(),
                                               grid_search.best_params_))
            logging.info("{0}:最优参数R平方结果:{1}".format(self.now_time(),
                                                    grid_search.best_score_))
            self.rf = XGBRegressor(
                n_estimators=grid_search.best_params_['n_estimators'],
                max_depth=grid_search.best_params_['max_depth'],
                min_child_weight=grid_search.best_params_['min_child_weight'],
                gamma=grid_search.best_params_['gamma'],
                learning_rate=grid_search.best_params_['learning_rate'])

示例#5

0

显示文件

文件： Generation.py 项目： Misswurabbit/CCF_fund_competition

    def fit_model(self, data, target, test):
        clf = XGBRegressor(learning_rate=self.learning_rate,
                           n_estimators=self.n_estimators,
                           max_depth=self.max_depth,
                           min_child_weight=self.min_child_weight,
                           gamma=self.gamma,
                           subsample=self.subsample,
                           colsample_bytree=self.colsample_bytree,
                           objective=self.objective,
                           nthread=self.nthread,
                           scale_pos_weight=self.scale_pos_weight,
                           reg_alpha=self.reg_alpha,
                           reg_lambda=self.reg_lambda,
                           seed=self.seed)
        data = np.array(data).astype(float)
        scaler = MinMaxScaler()
        temp = scaler.fit(data)
        data = scaler.transform(data)
        test = scaler.transform(test)
        target = scaler.fit_transform(target)

        clf.fit(data, target)
        new_feature = clf.apply(data)
        new_test = clf.apply(test)
        X_train_new = self.mergeToOne(pd.DataFrame(data), new_feature)
        X_test_new = self.mergeToOne(pd.DataFrame(test), new_test)
        X_train_new = pd.DataFrame(X_train_new)
        X_test_new = pd.DataFrame(X_test_new)
        return X_train_new, target, X_test_new

示例#6

0

显示文件

 def __init__(self,
              mean_model_params={},
              upper_quantile_params={
                  'alpha': 0.95,
                  'delta': 1.0,
                  'thresh': 1.0,
                  'variance': 1.0
              },
              lower_quantile_params={
                  'alpha': 0.05,
                  'delta': 1.0,
                  'thresh': 1.0,
                  'variance': 1.0
              }):
     self.mean_model_params = mean_model_params
     self.upper_quantile_params = upper_quantile_params
     self.lower_quantile_params = lower_quantile_params
     self.gb = XGBRegressor(**mean_model_params)
     mean_model_params.pop('alpha', None)
     upper_quantile_params_combined = {**mean_model_params}
     upper_quantile_params_combined.update(upper_quantile_params)
     lower_quantile_params_combiled = {**mean_model_params}
     lower_quantile_params_combiled.update(lower_quantile_params)
     self.gb_quantile_upper = XGBQuantileRegressor(
         **upper_quantile_params_combined)
     self.gb_quantile_lower = XGBQuantileRegressor(
         **lower_quantile_params_combiled)
     self.upper_alpha = upper_quantile_params['alpha']
     self.lower_alpha = lower_quantile_params['alpha']

示例#7

0

显示文件

文件： base.py 项目： sbkhosh/dax

def set_grid_search(regrs, X_train, y_train, reg):
    if (regrs == 'tree'):
        random_grid = build_grid_tree()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = DecisionTreeRegressor(max_features=prms['max_features'], max_depth=prms['max_depth'], \
                                         min_samples_split=prms['min_samples_split'], max_leaf_nodes=prms['max_leaf_nodes'], \
                                         min_samples_leaf=prms['min_samples_leaf'])
    elif (regrs == 'forest'):
        random_grid = build_grid_rf()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = RandomForestRegressor(n_estimators=prms['n_estimators'],max_features=prms['max_features'], \
                                         max_depth=prms['max_depth'], min_samples_split=prms['min_samples_split'], \
                                         min_samples_leaf=prms['min_samples_leaf'], n_jobs=-1)
    elif (regrs == 'xgbr'):
        random_grid = build_grid_xgbr()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = XGBRegressor(learning_rate=prms['learning_rate'], max_depth=prms['max_depth'], \
                                min_child_weight=prms['min_child_weight'], n_estimators=prms['n_estimators'],\
                                subsample=prms['subsample'], n_jobs=-1)
    elif (regrs == 'nn'):
        random_grid = build_grid_nn()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = MLPRegressor(hidden_layer_sizes=prms['hidden_layer_sizes'],activation=prms['activation'],solver=prms['solver'],\
                                alpha=prms['alpha'],learning_rate_init=prms['learning_rate_init'],learning_rate=prms['learning_rate'],\
                                max_iter=prms['max_iter'],tol=prms['tol'],momentum=prms['momentum'],beta_1=prms['beta_1'],\
                                beta_2=prms['beta_2'],n_iter_no_change=prms['n_iter_no_change'])
    return (reg_prms)

示例#8

0

显示文件

    def train(self, X_train, X_test, y_train, y_test):
        '''
        Trains the machine learning model based on the dataframe provided as input.
        The fitted model will be saved under model/xgboost.pkl
        The function returns the MSE and the RMSE
        :param df:
        :return: RMSE and MAE scores
        '''
        print('Training is starting...')
        eval_set = [(X_train, y_train), (X_test, y_test)]

        self.model = XGBRegressor(max_depth=7,
                                  objective='reg:squarederror',
                                  gamma=0,
                                  learning_rate=0.03,
                                  subsample=1,
                                  colsample_bytree=0.9,
                                  min_child_weight=10)

        self.model.fit(X_train,
                       y_train,
                       eval_set=eval_set,
                       eval_metric="rmse",
                       early_stopping_rounds=500)

        predictions = self.predict(X_test)

        with open('generated/gxboost_model.pickle', 'wb') as file:
            pickle.dump(self.model, file)

        self.evaluate(y_test, X_test)

示例#9

0

显示文件

    def grid_search(self, X_train, X_test, y_train, y_test):
        grid_param = {
            'max_depth': [n for n in range(2, 10)],
            'gamma': np.arange(0, 0.5, 0.1),
            'learning_rate': [0.0001, 0.001, 0.01, 0.1],
            'subsample': np.arange(0.5, 0.9, 0.1),
            'colsample_bytree': np.arange(0.5, 0.9, 0.1),
            'min_child_weight': [1, 3, 5, 7]
        }

        model = XGBRegressor(max_depth=7,
                             objective='reg:squarederror',
                             gamma=0,
                             learning_rate=0.03,
                             subsample=1,
                             colsample_bytree=0.9,
                             min_child_weight=10)

        gd_sr = GridSearchCV(estimator=model,
                             param_grid=grid_param,
                             scoring='neg_mean_squared_error',
                             cv=5,
                             n_jobs=-1)

        gd_sr.fit(X_train, y_train)

        best_parameters = gd_sr.best_params_
        print(best_parameters)

示例#10

0

显示文件

def xgbregressor(xtrain, y_train, x_test):
    
  xgb_reg = XGBRegressor()
  parameters = {'nthread':[4], 
              'objective':['reg:linear'],
              'learning_rate': [.07], 
              'max_depth': [7],
              'min_child_weight': [4],
              'silent': [1],
              'subsample': [0.7],
              'colsample_bytree': [0.7],
              'n_estimators': [12]}

  clf_xgbreg = GridSearchCV(xgb_reg,
                        parameters,
                        n_jobs = 5,
                        cv = 2,
                        verbose=True)

  clf_xgbreg.fit(x_train,y_train)
  
  #print(clf_xgbreg.best_params_)
  
  #values_to_predict = y_train
  preds = clf_RF.predict(x_train)
  
  y_test_pred = clf_xgbreg.predict(x_test)
  
  print(y_test_pred)
  
  print(pd.DataFrame(y_test_pred).describe())
  
  return preds

示例#11

0

显示文件

文件： tool.py 项目： piupiuup/competition

def over_sample(train, test, feat):
    predictors = [x for x in train.columns if x not in ['ID', 'y']]
    groups = list(train[feat].unique())
    result = None
    for name in groups:
        train_temp = pd.concat([train, train[train[feat] == name]])
        test_temp = test[test[feat] == name]
        model = XGBRegressor(max_depth=4,
                             learning_rate=0.0045,
                             n_estimators=1250,
                             silent=True,
                             objective='reg:linear',
                             nthread=-1,
                             min_child_weight=1,
                             max_delta_step=0,
                             subsample=0.93,
                             seed=27)
        model.fit(train_temp[predictors], train_temp['y'])
        pred = model.predict(test_temp[predictors])
        if result is None:
            result = pd.DataFrame({'ID': test_temp['ID'].values, 'y': pred})
        else:
            result = pd.concat([
                result,
                pd.DataFrame({
                    'ID': test_temp['ID'].values,
                    'y': pred
                })
            ])
    result.sort_values('ID', inplace=True)

    return result

示例#12

0

显示文件

def get_estimator():

    drop_cols = [
        'CODGEO', 'LIBGEO', 'REG', 'DEP', 'Code Nuance', 'Code du département'
    ]
    base_cols = [
        'Orientation Economique', 'SEG Croissance POP', 'Urbanité Ruralité',
        'Dynamique Démographique BV', 'Environnement Démographique',
        'Fidélité', 'SYN MEDICAL', 'Seg Dyn Entre',
        'SEG Environnement Démographique Obsolète', 'Seg Cap Fiscale',
        'DYN SetC', 'CP', 'MED14', 'Nb Femme', 'Nb Homme'
    ]

    base_transformer = FunctionTransformer(_preprocessor, validate=False)

    base_transformer = make_pipeline(base_transformer,
                                     SimpleImputer(strategy='most_frequent'))

    preprocessor = ColumnTransformer(
        transformers=[
            ('base', base_transformer, base_cols),
            ('drop cols', 'drop', drop_cols),
        ],
        remainder='passthrough')  # remainder='drop' or 'passthrough'

    regressor = XGBRegressor()

    pipeline = Pipeline(steps=[('preprocessing',
                                preprocessor), ('Regressor', regressor)])

    return pipeline

示例#13

0

显示文件

    def model_xgb_search(self, X, Y):
        # train_x, valid_x, train_y, valid_y = train_test_split(X, Y, test_size=0.1, random_state=0)  # 分训练集和验证集
        print('model_xgb_search start')
        xgb_model = XGBRegressor()

        # cv_split = ShuffleSplit(n_splits=5, train_size=0.7, test_size=0.2)
        # param_grid = dict(
        #     max_depth=[2],
        #     min_child_weight= [1, 2, 3, 4, 5, 6],
        #     gamma=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
        #     learning_rate=np.linspace(0.03, 1, 10),
        #     n_estimators=[50, 100, 200, 400],
        #     num_class=[2],
        #     objective=['multi:softmax']
        # )
        param_grid = dict(
            max_depth=[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],  # 3
            learning_rate=np.linspace(0.03, 0.1, 5),
            n_estimators=[100, 200, 300],  # 200
            # num_class=[2],
            # objective=['multi:softmax']  # 'binary:logistic'
        )
        start = time.time()
        cv_split = StratifiedKFold(n_splits=5, shuffle=True)
        grid = GridSearchCV(xgb_model, param_grid, cv=cv_split)  # scoring='neg_log_loss'
        grid_result = grid.fit(X, Y)
        print("Best: %f using params: %s estimator: %s" % (
            grid_result.best_score_, grid_result.best_params_, grid_result.best_estimator_))
        print('GridSearchCV process use %.2f seconds' % (time.time() - start))
        print("Save model to " + self.model_path)
        dump(grid_result, self.model_path)
        print('end=======')

示例#14

0

显示文件

 def xgboostmodel(self):
     df = pd.read_csv(datafile, encoding='utf-8', index_col=0)
     print(df.shape)
     traindata = df.iloc[:, :].values
     x = traindata[:, :-1]
     y = traindata[:, -1]
     x_train, x_test, y_train, y_test = train_test_split(
         x, y, train_size=0.7)  # list
     if self.params is None:
         params = {'max_depth': 80, 'n_estimators': 512}
     else:
         params = self.params
     raw_model = XGBRegressor(max_depth=128,
                              n_estimators=768,
                              learning_rate=0.01,
                              silence=False)
     raw_model.fit(x_train, y_train)
     raw_model.save_model(self.model_file)
     pred = raw_model.predict(x_test)
     self.true = y_test
     self.pred = pred
     self.show_save_figure(fig_path=self.fig_path,
                           modelname=self.job_name,
                           detal_idx=500)
     t_mean = self.cal_mean(self.true)
     p_mean = self.cal_mean(self.pred)
     self.save_result(self.result_path, true_mean=t_mean, pred_mean=p_mean)

示例#15

0

显示文件

 def fit_model_split(self, X_train, y_train, X_test, y_test):
     ##X_train_1用于生成模型  X_train_2用于和新特征组成新训练集合
     X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(
         X_train, y_train, test_size=0.6, random_state=0)
     clf = XGBRegressor(learning_rate=self.learning_rate,
                        n_estimators=self.n_estimators,
                        max_depth=self.max_depth,
                        min_child_weight=self.min_child_weight,
                        gamma=self.gamma,
                        subsample=self.subsample,
                        colsample_bytree=self.colsample_bytree,
                        objective=self.objective,
                        nthread=self.nthread,
                        scale_pos_weight=self.scale_pos_weight,
                        reg_alpha=self.reg_alpha,
                        reg_lambda=self.reg_lambda,
                        seed=self.seed)
     clf.fit(X_train_1, y_train_1)
     # y_pre = clf.predict(X_train_2)
     # y_pro = clf.predict_proba(X_train_2)[:, 1]
     # print
     # "pred_leaf=T AUC Score : %f" % metrics.roc_auc_score(y_train_2, y_pro)
     # print
     # "pred_leaf=T  Accuracy : %.4g" % metrics.accuracy_score(y_train_2, y_pre)
     new_feature = clf.apply(X_train_2)
     X_train_new2 = self.mergeToOne(X_train_2, new_feature)
     new_feature_test = clf.apply(X_test)
     X_test_new = self.mergeToOne(X_test, new_feature_test)
     print
     "Training set of sample size 0.4 fewer than before"
     return X_train_new2, y_train_2, X_test_new, y_test

示例#16

0

显示文件

def model_intrv3(Y_train, X_train, Y_test, X_test, Targ):
    global reslts
    global metrs
    import pandas as pd
    import numpy as np
    import datetime as dt
    import sklearn
    from sklearn.metrics import mean_squared_error
    from xgboost.sklearn import XGBRegressor
    from sklearn.metrics import mean_squared_error
    model = XGBRegressor(n_estimators=200,
                         learning_rate=0.05,
                         max_depth=4,
                         random_state=0,
                         subsample=0.9,
                         colsample_bytree=1.0,
                         loss='ls').fit(X_train, Y_train)
    model.score(X_test, Y_test)

    pred_Yxgb = model.predict(X_test)
    mse = mean_squared_error(Y_test, pred_Yxgb)
    nRMSE = np.sqrt(mse) / Targ.mean()
    # nRMSE=np.sqrt(mse)/max(Targ)
    Yts_pd = {'Yts': Y_test, 'Ypd': pred_Yxgb}
    Yts_pd = pd.DataFrame(Yts_pd)
    print(mse, nRMSE)
    metrs = {'mse': mse, 'nRMSE': nRMSE}
    reslts = {'Ypred': pred_Yxgb, 'Yts_pd': Yts_pd}
    return {'Yts_pd': Yts_pd, 'mse': mse, 'nRMSE': nRMSE}

示例#17

0

显示文件

 def fit_model(self, X_train, y_train, X_test, y_test):
     clf = XGBRegressor(learning_rate=self.learning_rate,
                        n_estimators=self.n_estimators,
                        max_depth=self.max_depth,
                        min_child_weight=self.min_child_weight,
                        gamma=self.gamma,
                        subsample=self.subsample,
                        colsample_bytree=self.colsample_bytree,
                        objective=self.objective,
                        nthread=self.nthread,
                        scale_pos_weight=self.scale_pos_weight,
                        reg_alpha=self.reg_alpha,
                        reg_lambda=self.reg_lambda,
                        seed=self.seed)
     clf.fit(X_train, y_train)
     # y_pre = clf.predict(X_test)
     # y_pro = clf.predict_proba(X_test)[:, 1]
     # print
     # "pred_leaf=T  AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro)
     # print("pred_leaf=T  Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre))
     new_feature = clf.apply(X_train)
     X_train_new = self.mergeToOne(X_train, new_feature)
     new_feature_test = clf.apply(X_test)
     X_test_new = self.mergeToOne(X_test, new_feature_test)
     print
     "Training set sample number remains the same"
     return X_train_new, y_train, X_test_new, y_test

示例#18

0

显示文件

文件： train_models.py 项目： rohansurve212/Club_Mahindra_Data_Hack

def train_xgb(df_preprocessed, df_target):
    """
    Train an XGBoost Regressor on the data
    :param df_preprocessed: features
    :param df_target: target
    :return: a tuple of best estimator and best estimator score
    """
    xgb_reg = XGBRegressor(
        nthread=4,
        objective='reg:linear',
        learning_rate=0.02,  # so called `eta` value
        max_depth=10,
        min_child_weight=1,
        gamma=3,
        subsample=1.0,
        colsample_bytree=0.35)

    param_grid = {'n_estimators': [1000]}

    gridsearch_xgb = GridSearchCV(xgb_reg,
                                  param_grid,
                                  cv=3,
                                  verbose=1,
                                  n_jobs=-1,
                                  scoring='neg_mean_squared_error')
    gridsearch_xgb.fit(df_preprocessed, df_target)

    # save the model to disk
    # xgb_filename = r'models\xgboost_model.sav'
    # pickle.dump(gridsearch_xgb, open(xgb_filename, 'wb'))
    print(np.sqrt(-gridsearch_xgb.best_score_))

    return gridsearch_xgb.best_estimator_, np.sqrt(-gridsearch_xgb.best_score_)

示例#19

0

显示文件

    def def_model(self, parameters: dict = None):
        model = XGBRegressor()

        if parameters is not None:
            model.set_params(**parameters)

        self._model = model

示例#20

0

显示文件

def XGB_reg_evaluation(individual, evaluation_method='roll_win'):
    '''
    evaluation_method : can be roll_win, mse
    '''

    if evaluation_method == 'roll_win':
        trainNumber = individual[6]  # the train num
        param = {
            'eta': individual[0],
            'silent': True,
            'objective': "reg:linear",
            'nthread': -1,
            'min_child_weight': individual[1],
            'max_depth': individual[2],
            'subsample': individual[3],
            'colsample_bylevel': individual[4],
            'seed': 0
        }
        roll_win_mseValue = 0
        for i in xrange(N_validation):
            trainingX, trainingY = trainX[(trainNum - (i + 1) * window - trainNumber):(trainNum - (i + 1) * window),:],\
                                          trainY[(trainNum - (i + 1) * window - trainNumber):(trainNum - (i + 1) * window)]

            testingX, testingY= trainX[(trainNum - (i + 1) * window):(trainNum - i * window),:], \
                                       trainY[(trainNum - (i + 1) * window):(trainNum - i * window)]
            dtrain = xgb.DMatrix(data=trainingX, label=trainingY)
            bst = xgb.train(params=param,
                            dtrain=dtrain,
                            num_boost_round=individual[5])
            testingX = xgb.DMatrix(testingX)
            roll_win_mseValue += sum(
                (testingY - bst.predict(testingX))**2) / window
        roll_win_mseValue /= N_validation
        return (roll_win_mseValue, )

    if evaluation_method == 'mse':
        ### The cross validation evaluation
        N_SPLITS = N_splits
        kf = KFold(n_splits=N_SPLITS)
        cv_mseValue = 0
        fc = XGBRegressor(learning_rate=individual[0],
                          n_estimators=individual[5],
                          silent=True,
                          objective="reg:linear",
                          nthread=-1,
                          gamma=0,
                          min_child_weight=individual[1],
                          max_depth=individual[2],
                          subsample=individual[3],
                          colsample_bylevel=individual[4],
                          seed=0)
        for train, test in kf.split(trainX):
            fc.fit(trainX[train, :], trainY[train])
            cv_mseValue += sum(
                (trainY[test] - fc.predict(trainX[test, :]))**2) / len(test)
        cv_mseValue = cv_mseValue / N_SPLITS
        return (cv_mseValue, )

    print "There is no evaluation method for %s" % evaluation_method
    raise Exception("evaluation_method is not valid")

示例#21

0

显示文件

文件： MLStock.py 项目： HuangNing616/stock_predict

    def xgboost_single_pred(self):

        x_train = self.x_train
        y_train = self.y_train

        x_test = self.x_test
        y_test = self.y_test

        self.y_pred_all_xgb = []
        y_train = list(y_train)
        xgboost_clf = XGBRegressor(learning_rate=0.1, n_estimators=75)

        for i in range(len(x_test)):
            xgboost_clf.fit(x_train, y_train)
            x_test_one = x_test.iloc[i:i + 1]
            y_test_one = xgboost_clf.predict(x_test_one)
            self.y_pred_all_xgb.append(list(y_test_one)[0])
            x_train = x_train.append(x_test_one)
            y_train.append(y_test[i])

        xgboost_mse = mean_squared_error(self.y_test, self.y_pred_all_xgb)
        xgboost_rmse = np.sqrt(xgboost_mse)
        y_pred_all_xgb = pd.DataFrame(list(self.y_pred_all_xgb))
        ratio_single_xgb = pd.DataFrame(list(self.y_test)) / y_pred_all_xgb
        return xgboost_rmse, y_pred_all_xgb, ratio_single_xgb

示例#22

0

显示文件

    def __train_model(self, features):
        combo_list = [
            ['available_year_avg', 'min_nights_year_avg', 'price_year_avg']
        #     ['available_winter_avg', 'min_nights_winter_avg', 'price_winter_avg'],
        #     ['available_spring_avg', 'min_nights_spring_avg', 'price_spring_avg'],
        #     ['available_summer_avg', 'min_nights_summer_avg', 'price_summer_avg']
        ]
        for combo in combo_list:
            X_base = features.drop([
                'price_year_avg', 'price_winter_avg', 'price_spring_avg', 'price_summer_avg', 'price_fall_avg',
                'available_year_avg', 'available_winter_avg', 'available_spring_avg', 'available_summer_avg', 'available_fall_avg',
                'min_nights_year_avg', 'min_nights_winter_avg', 'min_nights_spring_avg', 'min_nights_summer_avg', 'min_nights_fall_avg'
            ], axis=1)
            X_base[combo[0]] = features[combo[0]]
            X_base[combo[1]] = features[combo[1]]
            y = features[combo[2]]
            X_train, X_test, y_train, y_test = train_test_split(X_base, y, test_size=.25, random_state=42, shuffle=True)

            model = XGBRegressor(
                objective='reg:squarederror',
                learning_rate=0.1,
                max_depth=8,
                n_estimators=200,
                cv=5,
                n_jobs=-1
            )
            model.fit(X_train, y_train)
            self.logger.info('Gradient boost model:')
            self.logger.info(f'Target label: {combo[2]}')
            self.logger.info(f'R^2: {model.score(X_test, y_test)}')
            self.logger.info(f'MAE: {mean_absolute_error(y_test, model.predict(X_test))}')
            return model

示例#23

0

显示文件

文件： XGBoost.py 项目： zzti-bsj/analytics-zoo

    def _build(self, **config):
        """
        build the models and initialize.
        :param config: hyper parameters for building the model
        :return:
        """
        self.set_params(**config)
        if self.model_type == "regressor":
            self.model = XGBRegressor(n_estimators=self.n_estimators, max_depth=self.max_depth,
                                      n_jobs=self.n_jobs, tree_method=self.tree_method,
                                      random_state=self.random_state,
                                      learning_rate=self.learning_rate,
                                      min_child_weight=self.min_child_weight, seed=self.seed,
                                      subsample=self.subsample,
                                      colsample_bytree=self.colsample_bytree,
                                      gamma=self.gamma, reg_alpha=self.reg_alpha,
                                      reg_lambda=self.reg_lambda, verbosity=self.verbosity)
        elif self.model_type == "classifier":
            self.model = XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
                                       n_jobs=self.n_jobs, tree_method=self.tree_method,
                                       random_state=self.random_state,
                                       learning_rate=self.learning_rate,
                                       min_child_weight=self.min_child_weight, seed=self.seed,
                                       subsample=self.subsample,
                                       colsample_bytree=self.colsample_bytree,
                                       gamma=self.gamma, reg_alpha=self.reg_alpha,
                                       objective='binary:logistic',
                                       reg_lambda=self.reg_lambda, verbosity=self.verbosity)
        else:
            raise ValueError("model_type can only be \"regressor\" or \"classifier\"")

        self.model_init = True

示例#24

0

显示文件

文件： dcfe_final_clean.py 项目： michaeljoyce217/Galvanize_DSI_Capstone_Project

def cbd_model(cbd_df,cbd_finalinput):
    '''
    function that creates model from the cbd dataframe and returns the predicted
    number of crimes for the next three days
    '''

    X_cbd=cbd_df[['year', 'month', 'day', 'tmax', 'tmin', 'consumer_price_index',
       'gdp_millions_2007', 'seasonally_adjusted_unemployment',
       'unadjusted_unemployment', 'Possession, cocaine ',
       'Heroin, possession ', 'Heroin Price Canada',
       'day_segment_1200pm-1159pm', 'day_of_week_Monday',
       'day_of_week_Saturday', 'day_of_week_Sunday', 'day_of_week_Thursday',
       'day_of_week_Tuesday', 'day_of_week_Wednesday']]
    y_cbd=cbd_df['number_of_crimes']


    scaler = StandardScaler()
    scaler.fit(X_cbd)  # Don't cheat - fit only on training data
    X_cbd = scaler.transform(X_cbd)
    cbd_input_scaled = scaler.transform(cbd_finalinput)
    xgb=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)
    xgb.fit(X_cbd,y_cbd)
    predict_cbd=xgb.predict(cbd_input_scaled)

    return predict_cbd

示例#25

0

显示文件

文件： make_model_func1.py 项目： CCCartman/BigdataTalentsResearch

def get_ntree():
    rmse_t_total, rmse_v_total = [], []
    for ntree in range(10, 500, 10):
        xgb_base = XGBRegressor(objective='reg:linear',
                                n_estimators=ntree,
                                random_state=1234,
                                silent=0,
                                booster='gbtree',
                                eval_metric='rmse')
        rmse_t_1, rmse_v_1 = [], []
        print('此时 ntree = %s' % ntree)
        for train, test in get_cv(y=y_train, n_splits=5, random_state=42):
            X_t, y_t = X_train[train], y_train[train]
            X_v, y_v = X_train[test], y_train[test]
            xgb_base.fit(X_t, y_t)
            y_t_pre = xgb_base.predict(X_t)
            y_v_pre = xgb_base.predict(X_v)
            rmse_t_each = np.sqrt(mean_squared_error(y_t, y_t_pre))
            rmse_v_each = np.sqrt(mean_squared_error(y_v, y_v_pre))
            rmse_t_1.append(rmse_t_each)
            rmse_v_1.append(rmse_v_each)
        rmse_t = np.mean(rmse_t_1)
        rmse_v = np.mean(rmse_v_1)
        rmse_t_total.append(rmse_t)
        rmse_v_total.append(rmse_v)

    return rmse_t_total, rmse_v_total

示例#26

0

显示文件

文件： trainclassxgb.py 项目： Kobtul/diploma-profiling-experiments

def train_first_test(experiment_name, x_train, y_train, features):
    global file_loc
    file_loc = 'data/' + experiment_name + '/'
    from xgboost.sklearn import XGBRegressor
    import scipy.stats as st

    one_to_left = st.beta(10, 1)
    from_zero_positive = st.expon(0, 50)

    params = {
        "n_estimators": st.randint(3, 15),
        "max_depth": st.randint(3, 40),
        "learning_rate": st.uniform(0.05, 0.4),
        "colsample_bytree": one_to_left,
        "subsample": one_to_left,
        "gamma": st.uniform(0, 10),
        'reg_alpha': from_zero_positive,
        "min_child_weight": from_zero_positive,
    }
    #xgbreg = XGBRegressor(nthreads=-1)
    xgbreg = XGBRegressor()

    from sklearn.model_selection import RandomizedSearchCV
    gs = RandomizedSearchCV(xgbreg, params, n_jobs=1)
    gs.fit(x_train, y_train)

    joblib.dump(gs.best_estimator_, file_loc + 'clf_bestmodel.pkl')
    return gs.best_estimator_

示例#27

0

显示文件

文件： func_predictors.py 项目： srams1986/senior-data-science

    def __init__(self, nb_classes, bags=1, param={}):
        import xgboost as xgb
        from xgboost.sklearn import XGBRegressor

        self.nb_classes = nb_classes
        self.objective = param.get('objective','reg:linear')
        self.nthread = param.get('nthread',-1)
        self.n_estimators = param.get('n_estimators',10)
        self.max_depth = param.get('max_depth', 6)
        self.learning_rate = param.get('learning_rate', 0.3)
        self.colsample_bytree = param.get('colsample_bytree', 1.0)
        self.subsample = param.get('subsample', 1.0)
        self.missing = param.get('missing', None)
        self.seed = param.get('seed', 0)
        self.bags = bags
        self.bags_models = tuple()
        self.train_y = None
        for bag in range(self.bags):
            models = tuple()
            for k in range(self.nb_classes):
                model = XGBRegressor(objective = self.objective, nthread = self.nthread, seed = self.seed + bag,
                                     n_estimators = self.n_estimators, missing = self.missing,
                                     max_depth = self.max_depth, learning_rate = self.learning_rate,
                                     colsample_bytree = self.colsample_bytree, subsample = self.subsample)
                models = models + (model,)
            self.bags_models = self.bags_models + (models, )

示例#28

0

显示文件

def learn_model(X_train, y_train, X_valid, y_valid):
    t1 = time()
    model = XGBRegressor(max_depth=7, n_estimators=500)
    model.fit(X_train, y_train, eval_metric="rmse", eval_set=[(X_train, y_train), (X_valid, y_valid)], verbose=True, early_stopping_rounds=10)
    t2 = time()
    print('Total of training time: ', t2 - t1)
    return model

示例#29

0

显示文件

def tun_reg_alpha(reg_alpha_range, param_data_path, train_x, train_y):
    '''
    tune the reg_alpha param in xgboost
    get the best param and save them to the file for further tuning

    :param reg_alpha_range: the range of reg_alpha you want to test

    :param param_data_path: default './../data/param_data.pkl'

    :return: void
    '''
    # get the newest param first
    param_dict = get_param_data(param_data_path=param_data_path)

    print "正则化参数reg_alpha调优"
    param_test1 = {'reg_alpha': reg_alpha_range}
    gsearch1 = GridSearchCV(estimator=XGBRegressor(**param_dict),
                            param_grid=param_test1,
                            scoring='neg_mean_squared_error',
                            iid=False,
                            cv=5)
    gsearch1.fit(X=train_x, y=train_y)

    # show the results
    for i in gsearch1.grid_scores_:
        print i
    print "best_params_ and best_score_:"
    print gsearch1.best_params_, gsearch1.best_score_

    # change some param and return
    param_dict['reg_alpha'] = gsearch1.best_params_['reg_alpha']

    save_param_data(param_dict=param_dict, param_data_path=param_data_path)

示例#30

0

显示文件

def search_best_parameters(X, y):

    xgb_grid = {
        'n_estimators': [80, 100, 120],
        'max_depth': [3, 4, 5],
        'learning_rate': [0.1, 0.2, 0.5],
        'booster': ['gbtree', 'gblinear', 'dart'],
        'gamma': [0, 0.2, 0.5],
        'subsample': [0.5, 0.8],
        'reg_alpha': [0.2, 0.3, 0.5],
        'reg_lambda': [0.5, 0.8, 1],
        'colsample_bytree': [1, 0.8, 0.5],
        'colsample_bylevel': [1, 0.8, 0.5],
        'colsample_bynode': [1, 0.8, 0.5],
        'random_state': [77]
    }

    xgb_gridsearch = GridSearchCV(XGBRegressor(),
                                  xgb_grid,
                                  n_jobs=-1,
                                  verbose=True,
                                  scoring='r2')

    xgb_gridsearch.fit(X, y)
    print(f"best parameters: {xgb_gridsearch.best_params_}")