Python get_train_test_split примеры, util.get_train_test_split Python примеры использования

Пример #1

0

Показать файл

Файл: XGBOptimizer.py Проект: mmmmmmiracle/water

	def objective_function(self,x):
		print("XGBRegressor优化中...")
		train_x, test_x, train_y, test_y = util.get_train_test_split(self.src,int(np.round(x[0])),int(np.round(x[1])),with_rain=self.with_rain)
		print(self.model_name)
		self.tune_params = ['offset','period','max_depth',
							# 'learning_rate',
		 					'n_estimators',
							'gasmma',
							'min_child_weight','max_delta_step','subsample',
							'colsample_bytree','colsample_bylevel','colsample_bynode','reg_alpha',
							'reg_lambda','scale_pos_weight','base_score'
							]
		self.model.max_depth = int(x[2])
		self.model.n_estimators = int(x[3])
		self.model.gamma = x[4]
		self.model.min_child_weight = int(x[5])
		self.model.max_delta_step = int(x[6])
		self.model.subsample = x[7]
		self.model.colsample_bytree = x[8]
		self.model.colsample_bylevel = x[9]
		self.model.colsample_bynode = x[10]
		self.model.reg_alpha = x[11]
		self.model.reg_lambda = x[12]
		self.model.scale_pos_weight = x[13]
		self.model.base_score = x[14]
		self.model.objective = 'reg:squarederror'
		self.model.learning_rate = 0.001
		self.model.fit(X=train_x,y=train_y)
		y_hat = self.model.predict(test_x)
		mse = mean_squared_error(y_hat,test_y)
		return mse

Пример #2

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     print("GradientBoostingRegressor优化中...")
     self.tune_params = [
         'offset', 'period', 'n_estimators', 'learning_rate', 'subsample',
         'min_samples_split', 'min_samples_leaf',
         'min_weight_fraction_leaf', 'max_depth', 'alpha'
     ]
     print(self.model_name)
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.n_estimators = int(x[2])
     self.model.learning_rate = x[3]
     self.model.subsample = x[4]
     self.model.min_samples_split = int(x[5])
     self.model.min_samples_leaf = int(x[6])
     self.model.min_weight_fraction_leaf = x[7]
     self.model.max_depth = int(x[8])
     self.model.alpha = x[9]
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #3

0

Показать файл

Файл: optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #4

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     self.tune_params = ['offset', 'period']
     print(self.model_name)
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #5

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     print("Ridge最优化中...")
     self.tune_params = ['offset', 'period', 'alpha', 'max_iter', 'tol']
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.alpha = x[2]
     self.model.max_iter = x[3]
     self.model.tol = x[4]
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #6

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     print("RidgeCV最优化中...")
     self.tune_params = [
         'offset', 'period', 'alpha0', 'alpha1', 'alpha2', 'alpha3', 'cv'
     ]
     print(self.model_name)
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.alphas = [x[2], x[3], x[4], x[5]]
     self.cv = int(x[6])
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #7

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     print("BaggingRegressor优化中...")
     self.tune_params = [
         'offset', 'period', 'n_estimators', 'max_samples', 'max_features'
     ]
     print(self.model_name)
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.n_estimators = int(x[2])
     self.model.max_samples = x[3]
     self.model.max_features = x[4]
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #8

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     print("AdaBoostRegressor优化中...")
     self.tune_params = [
         'offset', 'period', 'n_estimators', 'learning_rate'
     ]
     print(self.model_name)
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.n_estimators = int(x[2])
     self.model.learning_rate = x[3]
     self.model.loss = 'square'
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #9

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     print("LassoLarsCV最优化中...")
     self.tune_params = [
         'offset', 'period', 'max_iter', 'max_n_alphas', 'eps'
     ]
     print(self.model_name)
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.max_iter = x[2]
     self.model.max_n_alphas = x[3]
     self.model.eps = x[4]
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #10

0

Показать файл

Файл: XGBOptimizer.py Проект: mmmmmmiracle/water

	def objective_function(self,x):
		print("CatBoostRegressor优化中...")
		train_x, test_x, train_y, test_y = util.get_train_test_split(self.src,int(np.round(x[0])),int(np.round(x[1])),with_rain=self.with_rain)
		print(self.model_name)
		self.tune_params = ['offset','period','max_depth','subsample','learning_rate','n_estimators',
							'min_child_samples','max_leaves']
		self.model.max_depth = int(x[2])
		self.model.subsample = x[3]
		self.model.learning_rate = x[4]
		self.model.n_estimators = int(x[5])
		self.model.min_child_samples = x[6]
		self.model.max_leaves = x[7]
		self.model.task_type = 'gpu'
		self.model.iterations = 100
		self.model.loss_function = 'MSE'
		train_x,val_x,train_y,val_y = train_test_split(train_x,train_y,test_size=0.1)
		self.model.fit(train_x,train_y,eval_set=(val_x,val_y),plot=True)
		y_hat = self.model.predict(test_x)
		mse = mean_squared_error(y_hat,test_y)
		return mse

Пример #11

0

Показать файл

Файл: base_optimizer.py Проект: mmmmmmiracle/water

 def objective_function(self, x):
     print("BayesianRidge最优化中...")
     self.tune_params = [
         'offset', 'period', 'n_iter', 'tol', 'alpha_1', 'alpha_2',
         'lambda_1', 'lambda_2'
     ]
     print(self.model_name)
     train_x, test_x, train_y, test_y = util.get_train_test_split(
         self.src,
         int(np.round(x[0])),
         int(np.round(x[1])),
         with_rain=self.with_rain)
     self.model.n_iter = int(x[2])
     self.model.tol = x[3]
     self.model.alpha_1 = x[4]
     self.model.alpha_2 = x[5]
     self.model.lambda_1 = x[6]
     self.model.lambda_2 = x[7]
     self.model.fit(X=train_x, y=train_y)
     y_hat = self.model.predict(test_x)
     mse = mean_squared_error(y_hat, test_y)
     return mse

Пример #12

0

Показать файл

Файл: XGBOptimizer.py Проект: mmmmmmiracle/water

	def objective_function(self,x):
		print("LightGBMRegressor优化中...")
		train_x, test_x, train_y, test_y = util.get_train_test_split(self.src,int(np.round(x[0])),int(np.round(x[1])),with_rain=self.with_rain)
		print(self.model_name)
		self.tune_params = ['offset','period','num_leaves','learning_rate','feature_fraction','bagging_fraction','bagging_freq']
		params = {
			'task': 'train',
			'boosting_type': 'gbdt',       # 设置提升类型
			'objective': 'regression',     # 目标函数
			'metric': {'mse'},       # 评估函数
			'num_leaves': int(x[2]),            # 叶子节点数
			'learning_rate': x[3],         # 学习速率
			'feature_fraction': x[4],      # 建树的特征选择比例
			'bagging_fraction': x[5],      # 建树的样本采样比例
			'bagging_freq': int(x[6]),          # k 意味着每 k 次迭代执行bagging
			'verbose': 0,                   # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
			'device':'gpu'
		}
		train_x,val_x,train_y,val_y = train_test_split(train_x,train_y,test_size=0.1)
		self.model = lgb.train(params,lgb.Dataset(train_x,train_y),num_boost_round=100,valid_sets=lgb.Dataset(val_x,val_y),early_stopping_rounds=5)
		y_hat = self.model.predict(test_x,num_iteration=self.model.best_iteration)
		mse = mean_squared_error(y_hat,test_y)
		return mse

Пример #13

0

Показать файл

df, unique_labels = util.load_data(file_name)

auto_df, auto_unique_labels = util.load_data(auto_file_name)

print("df after loading (%ld rows):" % df.shape[0])
print(df.head(n=5))

# hash encode the data
df = util.hash_encoder(df, cols_to_hash, no_new_cols_per)
print("df after hashing (%ld rows):" % df.shape[0])
print(df.head(n=5))

auto_df = util.hash_encoder(auto_df, cols_to_hash, no_new_cols_per)

# separate into train/test X/y splits
df_train_X, df_train_y, df_test_X, df_test_y = util.get_train_test_split(df)

auto_df_test_X = auto_df
auto_df_test_y = auto_df_test_X.pop('delinquent')
auto_df_test_y = auto_df_test_y.astype(np.int32)

print("df train X:")
print(df_train_X.head(n=5))
print('df train X rows:')
print(df_train_X.shape[0])

print("df train y head:")
print(df_train_y.head(n=5))
print("df test y head:")
print(df_test_y.head(n=5))
print("auto df test y head:")

Python get_train_test_split примеры использования