model.save_model(f'../data/lgb{i}.model') #models = [] #for i in range(LOOP): # model = lgb.Booster(model_file=f'../data/lgb{i}.model') # models.append(model) imp = ex.getImp(models) imp['split'] /= imp['split'].max() imp['gain'] /= imp['gain'].max() imp['total'] = imp['split'] + imp['gain'] imp.sort_values('total', ascending=False, inplace=True) imp.reset_index(drop=True, inplace=True) imp.to_csv(f'LOG/imp_{__file__}.csv', index=False) utils.savefig_imp(imp, f'LOG/imp_{__file__}.png', x='total') RESULT_DICT['nfold'] = NFOLD RESULT_DICT['seed'] = SEED RESULT_DICT['eta'] = param['learning_rate'] RESULT_DICT['best NROUND'] = len(ret['auc-mean']) RESULT_DICT['train AUC'] = ret['auc-mean'][-1] del dtrain, X_train, y_train gc.collect() # ============================================================================= # test # ============================================================================= files_te = sorted(glob('../data/test_f*.f'))
result = f"CV wloss: {np.mean(wloss_list)} + {np.std(wloss_list)}" print(result) imp = ex.getImp(model_all) imp['split'] /= imp['split'].max() imp['gain'] /= imp['gain'].max() imp['total'] = imp['split'] + imp['gain'] imp.sort_values('total', ascending=False, inplace=True) imp.reset_index(drop=True, inplace=True) imp.to_csv(f'LOG/imp_{__file__}.csv', index=False) png = f'LOG/imp_{__file__}.png' utils.savefig_imp(imp, png, x='total', title=f'{__file__}') utils.send_line(result, png) for i,y_pred in enumerate(y_preds): y_pred = pd.DataFrame(utils_metric.softmax(y_pred.astype(float).values)) if i==0: tmp = y_pred else: tmp += y_pred tmp /= len(y_preds) y_preds = tmp.copy().values.astype(float) a_score = utils_metric.akiyama_metric(y.values, y_preds) print('akiyama_metric:', a_score)