def xgb_tuning(train_year=['2016', '2017', '2018', '2019'], test_season=['20180930']): truncate = 3 x_train, x_test, y_train, y_test, test_name = get_train_data( truncate=truncate, train_year=train_year, test_season=test_season) other_params = { 'eta': 0.3, 'n_estimators': 120, 'gamma': 0, 'max_depth': 6, 'min_child_weight': 12, 'learning_rate': 0.1, 'colsample_bytree': 1, 'colsample_bylevel': 0.5, 'subsample': 1.0, 'reg_lambda': 40, 'reg_alpha': 10, 'seed': 1302, 'scale_pos_weight': 1 } model = xgb.XGBClassifier(**other_params) #================================================================= # cv_params = {'n_estimators': np.linspace(30, 150, 13, dtype=int)} cv_params = {'max_depth': np.linspace(5, 10, 6, dtype=int)} cv_params = {'min_child_weight': np.linspace(5, 15, 11, dtype=int)} cv_params = {'gamma': np.linspace(0, 1, 5)} cv_params = {'subsample': np.linspace(0, 1, 6)} cv_params = {'reg_lambda': np.linspace(0, 100, 11)} cv_params = {'reg_alpha': np.linspace(0, 10, 11)} cv_params = {'scale_pos_weight': np.linspace(0, 10, 11)} p50_score = make_scorer(precision_50, greater_is_better=True, needs_proba=True) gs = GridSearchCV(model, cv_params, verbose=2, refit=True, cv=5, n_jobs=10, scoring=p50_score) gs.fit(x_train, y_train) print("参数的最佳取值::", gs.best_params_) print("最佳模型得分:", gs.best_score_)
import sys # settings from backends import get_generator, get_discriminator_binary from data_preprocessing import get_train_data from settings import settings_binary from triplet_utils import load_model, modify_indexes, get_sim args = settings_binary() # fixed random seeds rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) # load CIFAR-10 trainx, trainy, txs, tys = get_train_data(args.data_dir, args.count, args.seed_data) trainx_unl = trainx.copy() nr_batches_train = int(trainx.shape[0] / args.batch_size) # specify generative model gen_layers = get_generator(args.batch_size, theano_rng) gen_dat = ll.get_output(gen_layers[-1]) # specify discriminative model disc_layers = get_discriminator_binary() disc_params = ll.get_all_params(disc_layers, trainable=True) # you can use pretrained models if args.use_pretrained: load_model(gen_layers, args.generator_pretrained) load_model(disc_layers, args.discriminator_pretrained)
def get_xgb_prediction(test_season=['20180930'], load=False, read_sql=True): time1 = time.time() truncate = 3 x_train, x_test, y_train, y_test, test_name = get_train_data( truncate=truncate, train_year=['2016', '2017', '2018', '2019'], test_season=test_season, read_sql=read_sql) print('load data time:', time.time() - time1) other_params = { 'eta': 0.3, 'n_estimators': 120, 'gamma': 0, 'max_depth': 6, 'min_child_weight': 12, 'learning_rate': 0.1, 'colsample_bytree': 1, 'colsample_bylevel': 0.5, 'subsample': 1.0, 'reg_lambda': 40, 'reg_alpha': 10, 'seed': 2020, 'scale_pos_weight': 1 } time2 = time.time() if load: model = pickle.load( open( os.getcwd() + "/saved_model/xgb_{}.dat".format(test_season[0]), "rb")) else: model = xgb.XGBClassifier(**other_params) model.fit(x_train, y_train) pickle.dump( model, open( os.getcwd() + "/saved_model/xgb_{}.dat".format(test_season[0]), "wb")) print('computed time:', time.time() - time2) time3 = time.time() # plot_tree(model, num_trees=1) # plt.show() y_pred = model.predict_proba(x_test)[:, 1] res = output_result(y_pred, test_name, test_season) predicted_and_real = get_predicted_and_real(res) count_predicted, count_real = get_industry(res) print('p10:{:.4f},p30:{:.4f},p50:{:.4f},p100:{:.4f}'.format( precision_n(y_test, y_pred, 10), precision_n(y_test, y_pred, 30), precision_n(y_test, y_pred, 50), precision_n(y_test, y_pred, 100))) acc = accuracy_score(y_test, y_pred > 0.3) print('accuracy:', acc) print('output time:', time.time() - time3) # 接口名 res = res[[ 'ts_code', 'name', 'total_mv_mean', 'float_share_to_total_share', 'pb_mean', 'eps', 'roe', 'pe_ttm_mean', 'bps', 'industry' ]] res.rename(columns={ 'name': 'stockName', 'total_mv_mean': 'totalWorth', 'float_share_to_total_share': 'floatToTotal', 'pb_mean': 'pb', 'pe_ttm_mean': 'pettm' }, inplace=True) res = res.round(decimals=2) return res, predicted_and_real, acc, precision_n( y_test, y_pred, 30), count_predicted, count_real