print('FINAL SHAPE') print('dbuild.shape:{} dvalid.shape:{}\n'.format((dbuild.num_row(), dbuild.num_col()), (dvalid.num_row(), dvalid.num_col()))) return dbuild, dvalid, watchlist, sub_build, sub_valid #============================================================================== # cv #============================================================================== models = [] for i in range(LOOP): dbuild, dvalid, watchlist, sub_build, sub_valid = split_build_valid() model = xgb.train(param, dbuild, nround, watchlist, early_stopping_rounds=ESR, verbose_eval=5) models.append(model) del dbuild, dvalid, watchlist, sub_build, sub_valid gc.collect() imp = ex.getImp(models) imp.to_csv('imp_None.csv', index=0) #============================================================================== utils.end(__file__)
'eval_metric': 'auc', 'max_depth': 4, 'objective': 'binary:logistic', 'silent': 1, 'tree_method': 'hist', 'nthread': 64, 'seed': SEED} gc.collect() model = xgb.train(param, dtrain, NROUND) del dtrain; gc.collect() imp = ex.getImp(model) imp.to_csv('imp.csv', index=False) # ============================================================================= # test # ============================================================================= X = pd.concat([utils.read_pickles('../data/test_old'), utils.read_pickles('../data/002_test'), utils.read_pickles('../data/003_test'), utils.read_pickles('../data/004_test'), utils.read_pickles('../data/005_test'), utils.read_pickles('../data/101_test'), # utils.read_pickles('../data/102_test'), # utils.read_pickles('../data/103-1_test'), # utils.read_pickles('../data/103-2_test'),
model = xgb.train(params, dtrain, 9999, watchlist, verbose_eval=10, early_stopping_rounds=50) result = f"CV valid-auc: { model.best_score }" print(result) utils.send_line(result) # ============================================================================= # imp # ============================================================================= imp = ex.getImp(model).sort_values(['gain', 'feature'], ascending=[False, True]) imp.to_csv(f'LOG/imp_{__file__}.csv', index=False) #def multi_touch(arg): # os.system(f'touch "../feature_unused/{arg}.f"') # # #col = imp[imp['split']==0]['feature'].tolist() #pool = Pool(cpu_count()) #pool.map(multi_touch, col) #pool.close()