def check_code(mode, gru_mode): if(mode == 'simple'): train_df = pd.read_csv('/home/Team4/Team4/dataset/train_percentile.csv') test_df = pd.read_csv('/home/Team4/Team4/dataset/testB_percentile.csv') train_add = pd.read_csv('/home/Team4/Team4/dataset/train_old_wind_4240.csv') testA_add = pd.read_csv('/home/Team4/Team4/dataset/testB_old_wind_4240.csv') train_1ave8extend = pd.read_csv('/home/Team4/Team4/dataset/train_new_wind_1ave_8extend.csv') test_1ave = pd.read_csv('/home/Team4/Team4/dataset/testB_new_wind_1ave_8extend.csv') else: trainfile = '/home/Team4/CIKM2017/train.txt' testBfile = '/home/Team4/CIKM2017/testB.txt' #生成训练集数据,老的风 train_add = dp.dataprocess(trainfile, data_type='train', windversion='old') #生成测试集B数据,老的风 testA_add = dp.dataprocess(testBfile, data_type='testB', windversion='old') #生成训练集数据,1ave8extend train_1ave8extend = dp.dataprocess(trainfile, data_type='train', windversion='new') #生成测试集B数据,1ave test_1ave = dp.dataprocess(testBfile, data_type='testB', windversion='new') #生成训练集数据 train_df = gp.data_process(trainfile, data_type='train') #生成测试集B数据 test_df = gp.data_process(testBfile, data_type='testB') print('#data process has been done') result_xgb = xgbm.xgbmodeltrain(train_1ave8extend, test_1ave) print('#xgb model has been done') index = fs.pre_train(train_df=train_df, test_df=test_df, train_add=train_add, test_add=testA_add) valid = rf.rf_model(train_df, test_df, 'train', train_add, testA_add, ne=100) ne = 1100 result_rf = rf.rf_model(train_df, test_df, 'trai', train_add, testA_add, ne, index=index) print('#rf model has been done') result_bigru = bigru.BiGRU_train(train_df, test_df, valid, gru_mode).reshape(2000) print('#bigru model has been done') ensemble = (result_xgb+result_rf+result_bigru)/3.0 np.savetxt("/home/Team4/Team4/result/submit_Team4.csv", ensemble)
def check_code(mode, gru_mode): if mode == 'simple': train_df = pd.read_csv('/data/yuyang/weather/data/data_shenzhen/data_processed/train_percentile.csv') test_df = pd.read_csv('/data/yuyang/weather/data/data_shenzhen/data_processed/testB_percentile.csv') train_add = pd.read_csv('/data/yuyang/weather/data/data_shenzhen/data_processed/train_old_wind_4240.csv') testA_add = pd.read_csv('/data/yuyang/weather/data/data_shenzhen/data_processed/testB_old_wind_4240.csv') train_1ave8extend = pd.read_csv('/data/yuyang/weather/data/data_shenzhen/data_processed/train_new_wind_1ave_8extend.csv') test_1ave = pd.read_csv('/data/yuyang/weather/data/data_shenzhen/data_processed/testB_new_wind_1ave_8extend.csv') else: trainfile = '/data/yuyang/weather/data/data_shenzhen/CIKM2017_train/train' testBfile = '/data/yuyang/weather/data/data_shenzhen/CIKM2017_testB/CIKM2017_testB/test' args = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14'] p = Pool(15) # 生成训练集数据,老的风 # train_add = dp.dataprocess(trainfile, data_type='train', windversion='old') print('start----dp-train-old') func_dp_train_old = partial(dp.dataprocess, trainfile, 'train', 'old') p.map(func_dp_train_old, args) # 生成测试集B数据,老的风 # testA_add = dp.dataprocess(testBfile, data_type='testB', windversion='old') print('start----dp-test-old') func_dp_test_old = partial(dp.dataprocess, testBfile, 'testB', 'old') p.map(func_dp_test_old, args) # 生成训练集数据,1ave8extend # train_1ave8extend = dp.dataprocess(trainfile, data_type='train', windversion='new') print('start----dp-train-new') func_dp_train_new = partial(dp.dataprocess, trainfile, 'train', 'new') p.map(func_dp_train_new, args) # 生成测试集B数据,1ave # test_1ave = dp.dataprocess(testBfile, data_type='testB', windversion='new') print('start----dp-testB-new') func_dp_test_new = partial(dp.dataprocess, testBfile, 'testB', 'new') p.map(func_dp_test_new, args) # 生成训练集数据 # train_df = gp.data_process(trainfile, data_type='train') print('start----gp-train') func_gp_train = partial(gp.data_process, trainfile, 'train') p.map(func_gp_train, args) # 生成测试集B数据 # test_df = gp.data_process(testBfile, data_type='testB') print('start----gp-test') func_gp_test = partial(gp.data_process, testBfile, 'testB') p.map(func_gp_test, args) print('data process has been done') return 0 print('#data process has been done') daytime = time.strftime('%Y-%m-%d-%H', time.localtime(time.time())) print(daytime) result_xgb = xgbm.xgbmodeltrain(train_1ave8extend, test_1ave) np.savetxt("/data/yuyang/weather/result/shenzhen/" + "xgb_pre-{}.csv".format(daytime), result_xgb) print('#xgb model has been done') index = fs.pre_train(train_df=train_df, test_df=test_df, train_add=train_add, test_add=testA_add) valid = rf.rf_model(train_df, test_df, 'train', train_add, testA_add, ne=100) ne = 1100 result_rf = rf.rf_model(train_df, test_df, 'trai', train_add, testA_add, ne, index=index) np.savetxt("/data/yuyang/weather/result/shenzhen/" + "rf_pre-{}.csv".format(daytime), result_rf) print('#rf model has been done') result_bigru = bigru.BiGRU_train(train_df, test_df, valid, gru_mode).reshape(2000) np.savetxt("/data/yuyang/weather/result/shenzhen/" + "bi-gru_pre-{}.csv".format(daytime), result_bigru) print('#bigru model has been done') ensemble = (result_xgb + result_rf + result_bigru) / 3.0 np.savetxt("/data/yuyang/weather/result/shenzhen/" + "ensemble-{}.csv".format(daytime), ensemble)