def objective_function(self,x): print("XGBRegressor优化中...") train_x, test_x, train_y, test_y = util.get_train_test_split(self.src,int(np.round(x[0])),int(np.round(x[1])),with_rain=self.with_rain) print(self.model_name) self.tune_params = ['offset','period','max_depth', # 'learning_rate', 'n_estimators', 'gasmma', 'min_child_weight','max_delta_step','subsample', 'colsample_bytree','colsample_bylevel','colsample_bynode','reg_alpha', 'reg_lambda','scale_pos_weight','base_score' ] self.model.max_depth = int(x[2]) self.model.n_estimators = int(x[3]) self.model.gamma = x[4] self.model.min_child_weight = int(x[5]) self.model.max_delta_step = int(x[6]) self.model.subsample = x[7] self.model.colsample_bytree = x[8] self.model.colsample_bylevel = x[9] self.model.colsample_bynode = x[10] self.model.reg_alpha = x[11] self.model.reg_lambda = x[12] self.model.scale_pos_weight = x[13] self.model.base_score = x[14] self.model.objective = 'reg:squarederror' self.model.learning_rate = 0.001 self.model.fit(X=train_x,y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat,test_y) return mse
def objective_function(self, x): print("GradientBoostingRegressor优化中...") self.tune_params = [ 'offset', 'period', 'n_estimators', 'learning_rate', 'subsample', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_depth', 'alpha' ] print(self.model_name) train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.n_estimators = int(x[2]) self.model.learning_rate = x[3] self.model.subsample = x[4] self.model.min_samples_split = int(x[5]) self.model.min_samples_leaf = int(x[6]) self.model.min_weight_fraction_leaf = x[7] self.model.max_depth = int(x[8]) self.model.alpha = x[9] self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self, x): train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self, x): self.tune_params = ['offset', 'period'] print(self.model_name) train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self, x): print("Ridge最优化中...") self.tune_params = ['offset', 'period', 'alpha', 'max_iter', 'tol'] train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.alpha = x[2] self.model.max_iter = x[3] self.model.tol = x[4] self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self, x): print("RidgeCV最优化中...") self.tune_params = [ 'offset', 'period', 'alpha0', 'alpha1', 'alpha2', 'alpha3', 'cv' ] print(self.model_name) train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.alphas = [x[2], x[3], x[4], x[5]] self.cv = int(x[6]) self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self, x): print("BaggingRegressor优化中...") self.tune_params = [ 'offset', 'period', 'n_estimators', 'max_samples', 'max_features' ] print(self.model_name) train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.n_estimators = int(x[2]) self.model.max_samples = x[3] self.model.max_features = x[4] self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self, x): print("AdaBoostRegressor优化中...") self.tune_params = [ 'offset', 'period', 'n_estimators', 'learning_rate' ] print(self.model_name) train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.n_estimators = int(x[2]) self.model.learning_rate = x[3] self.model.loss = 'square' self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self, x): print("LassoLarsCV最优化中...") self.tune_params = [ 'offset', 'period', 'max_iter', 'max_n_alphas', 'eps' ] print(self.model_name) train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.max_iter = x[2] self.model.max_n_alphas = x[3] self.model.eps = x[4] self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self,x): print("CatBoostRegressor优化中...") train_x, test_x, train_y, test_y = util.get_train_test_split(self.src,int(np.round(x[0])),int(np.round(x[1])),with_rain=self.with_rain) print(self.model_name) self.tune_params = ['offset','period','max_depth','subsample','learning_rate','n_estimators', 'min_child_samples','max_leaves'] self.model.max_depth = int(x[2]) self.model.subsample = x[3] self.model.learning_rate = x[4] self.model.n_estimators = int(x[5]) self.model.min_child_samples = x[6] self.model.max_leaves = x[7] self.model.task_type = 'gpu' self.model.iterations = 100 self.model.loss_function = 'MSE' train_x,val_x,train_y,val_y = train_test_split(train_x,train_y,test_size=0.1) self.model.fit(train_x,train_y,eval_set=(val_x,val_y),plot=True) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat,test_y) return mse
def objective_function(self, x): print("BayesianRidge最优化中...") self.tune_params = [ 'offset', 'period', 'n_iter', 'tol', 'alpha_1', 'alpha_2', 'lambda_1', 'lambda_2' ] print(self.model_name) train_x, test_x, train_y, test_y = util.get_train_test_split( self.src, int(np.round(x[0])), int(np.round(x[1])), with_rain=self.with_rain) self.model.n_iter = int(x[2]) self.model.tol = x[3] self.model.alpha_1 = x[4] self.model.alpha_2 = x[5] self.model.lambda_1 = x[6] self.model.lambda_2 = x[7] self.model.fit(X=train_x, y=train_y) y_hat = self.model.predict(test_x) mse = mean_squared_error(y_hat, test_y) return mse
def objective_function(self,x): print("LightGBMRegressor优化中...") train_x, test_x, train_y, test_y = util.get_train_test_split(self.src,int(np.round(x[0])),int(np.round(x[1])),with_rain=self.with_rain) print(self.model_name) self.tune_params = ['offset','period','num_leaves','learning_rate','feature_fraction','bagging_fraction','bagging_freq'] params = { 'task': 'train', 'boosting_type': 'gbdt', # 设置提升类型 'objective': 'regression', # 目标函数 'metric': {'mse'}, # 评估函数 'num_leaves': int(x[2]), # 叶子节点数 'learning_rate': x[3], # 学习速率 'feature_fraction': x[4], # 建树的特征选择比例 'bagging_fraction': x[5], # 建树的样本采样比例 'bagging_freq': int(x[6]), # k 意味着每 k 次迭代执行bagging 'verbose': 0, # <0 显示致命的, =0 显示错误 (警告), >0 显示信息 'device':'gpu' } train_x,val_x,train_y,val_y = train_test_split(train_x,train_y,test_size=0.1) self.model = lgb.train(params,lgb.Dataset(train_x,train_y),num_boost_round=100,valid_sets=lgb.Dataset(val_x,val_y),early_stopping_rounds=5) y_hat = self.model.predict(test_x,num_iteration=self.model.best_iteration) mse = mean_squared_error(y_hat,test_y) return mse
df, unique_labels = util.load_data(file_name) auto_df, auto_unique_labels = util.load_data(auto_file_name) print("df after loading (%ld rows):" % df.shape[0]) print(df.head(n=5)) # hash encode the data df = util.hash_encoder(df, cols_to_hash, no_new_cols_per) print("df after hashing (%ld rows):" % df.shape[0]) print(df.head(n=5)) auto_df = util.hash_encoder(auto_df, cols_to_hash, no_new_cols_per) # separate into train/test X/y splits df_train_X, df_train_y, df_test_X, df_test_y = util.get_train_test_split(df) auto_df_test_X = auto_df auto_df_test_y = auto_df_test_X.pop('delinquent') auto_df_test_y = auto_df_test_y.astype(np.int32) print("df train X:") print(df_train_X.head(n=5)) print('df train X rows:') print(df_train_X.shape[0]) print("df train y head:") print(df_train_y.head(n=5)) print("df test y head:") print(df_test_y.head(n=5)) print("auto df test y head:")