def Test_blending_Binary_TestFold(X, y, nfold_test, blending_fold, verbose=True): # name list name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda'] # model_list , param_list , model_dict = OrderedDict() model_dict['xgb'] = mi.myXGBBinary() model_dict['lgb'] = mi.myLGBMBinary() model_dict['cat'] = mi.myCatBoostBinary() model_dict['rfc'] = mi.myRandomForestBinary() model_dict['svm'] = mi.mySVMBinary() model_dict['gpc'] = mi.myGPBinary() model_dict['lda'] = mi.myLDABinary() model_dict['qda'] = mi.myQDABinary() param_list = OrderedDict() param_list['xgb'] = mp.param_xgb('binary', len(np.unique(y)), use_gpu=False) param_list['lgb'] = mp.param_lgbm('binary', len(np.unique(y)), use_gpu=False) param_list['cat'] = mp.param_cat('binary', use_gpu=True, is_unbalance=False) param_list['rfc'] = mp.param_rf('binary') param_list['svm'] = mp.param_svm('binary') param_list['gpc'] = mp.param_gpc('binary') param_list['lda'] = mp.param_lda() param_list['qda'] = mp.param_qda() #fitting parmas fitpm_list = OrderedDict() for name in name_list: fitpm_list[name] = {} fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1} #fit_cat = {} #fit_xgb = {} # metric func metric_func = roc_auc_score # Result result_list = OrderedDict() auc_score_list = OrderedDict() for name in name_list: print(name) test_fold_index, fold_train_pred, fold_test_pred, mean_fold_score = tr.training_blending_Testfold_noVal( 'binary', model_dict[name], param_list[name], fitpm_list[name], metric_func, X, y, nfold_test, blending_fold, verbose) result_list[name] = [ test_fold_index, fold_train_pred, fold_test_pred, mean_fold_score ] print('done') print('Test_Classification_TestFold Compelte') return result_list
def Test_blending_Binary(xtrain, ytrain, xtest, blending_fold=5, verbose=False): # name list name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda'] # model_list , param_list , model_dicts = OrderedDict() model_dicts['xgb'] = mi.myXGBBinary() model_dicts['lgb'] = mi.myLGBMBinary() model_dicts['cat'] = mi.myCatBoostBinary() model_dicts['rfc'] = mi.myRandomForestBinary() model_dicts['svm'] = mi.mySVMBinary() model_dicts['gpc'] = mi.myGPBinary() model_dicts['lda'] = mi.myLDABinary() model_dicts['qda'] = mi.myQDABinary() param_list = OrderedDict() param_list['xgb'] = mp.param_xgb('binary', len(np.unique(ytrain)), use_gpu=False) param_list['lgb'] = mp.param_lgbm('binary', len(np.unique(ytrain)), use_gpu=False) param_list['cat'] = mp.param_cat('binary', use_gpu=True, is_unbalance=False) param_list['rfc'] = mp.param_rf('binary') param_list['svm'] = mp.param_svm('binary') param_list['gpc'] = mp.param_gpc('binary') param_list['lda'] = mp.param_lda() param_list['qda'] = mp.param_qda() #fitting parmas fitpm_list = OrderedDict() for name in name_list: fitpm_list[name] = {} fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1} #fit_cat = {} #fit_xgb = {} # metric func metric_func = roc_auc_score # Result result_list = OrderedDict() # Training for name in name_list: print(name) train_pred, test_pred, fold_metric = tr.training_blending_fixedTest( 'binary', model_dicts[name], param_list[name], fitpm_list[name], metric_func, xtrain, ytrain, xtest, blending_fold, verbose) result_list[name] = [train_pred, test_pred, fold_metric] return result_list
def Test_Classification(xtrain, ytrain, xtest, nfold=5, verbose=False): # name list name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda'] # model_list , param_list , model_list = OrderedDict() model_list['xgb'] = mi.myXGBClassifier() model_list['lgb'] = mi.myLGBMClassifier() model_list['cat'] = mi.myCatBoostClassifier() model_list['rfc'] = mi.myRandomForestClassifier() model_list['svm'] = mi.mySVMClassifier() model_list['gpc'] = mi.myGPClassifier() model_list['lda'] = mi.myLDAClassifier() model_list['qda'] = mi.myQDAClassifier() param_list = OrderedDict() param_list['xgb'] = mp.param_xgb('classification', len(np.unique(ytrain)), use_gpu=False) param_list['lgb'] = mp.param_lgbm('classification', len(np.unique(ytrain)), use_gpu=False) param_list['cat'] = mp.param_cat('classification', use_gpu=True, is_unbalance=False) param_list['rfc'] = mp.param_rf('classification') param_list['svm'] = mp.param_svm('classification') param_list['gpc'] = mp.param_gpc('classification') param_list['lda'] = mp.param_lda() param_list['qda'] = mp.param_qda() #fitting parmas fitpm_list = OrderedDict() for name in name_list: fitpm_list[name] = {} fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1} #fit_cat = {} #fit_xgb = {} # metric func metric_func = cu.aucpr #metric_func = partial(cu.auc_multi , [0,1,2,3]) # Result result_list = OrderedDict() # Training for name in name_list: print(name) fold_predict, fold_oof, fold_metric, fold_model = tr.training_fixedTest( 'classification', model_list[name], param_list[name], fitpm_list[name], metric_func, xtrain, ytrain, xtest, nfold) result_list[name] = [fold_predict, fold_oof, fold_metric, fold_model] print('Test_Classification Complete') return result_list
def Test_Classification_TestFold(X, y, nfold_test, nfold_val, verbose=True): # name list name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda'] # model_list , param_list , model_dict = OrderedDict() model_dict['xgb'] = mi.myXGBClassifier() model_dict['lgb'] = mi.myLGBMClassifier() model_dict['cat'] = mi.myCatBoostClassifier() model_dict['rfc'] = mi.myRandomForestClassifier() model_dict['svm'] = mi.mySVMClassifier() model_dict['gpc'] = mi.myGPClassifier() model_dict['lda'] = mi.myLDAClassifier() model_dict['qda'] = mi.myQDAClassifier() param_list = OrderedDict() param_list['xgb'] = mp.param_xgb('classification', len(np.unique(y)), use_gpu=False) param_list['lgb'] = mp.param_lgbm('classification', len(np.unique(y)), use_gpu=False) param_list['cat'] = mp.param_cat('classification', use_gpu=True, is_unbalance=False) param_list['rfc'] = mp.param_rf('classification') param_list['svm'] = mp.param_svm('classification') param_list['gpc'] = mp.param_gpc('classification') param_list['lda'] = mp.param_lda() param_list['qda'] = mp.param_qda() #fitting parmas fitpm_list = OrderedDict() for name in name_list: fitpm_list[name] = {} fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1} #fit_cat = {} #fit_xgb = {} # metric func metric_func = cu.aucpr #metric_func = partial(cu.auc_multi , [0,1,2,3]) # Result result_list = OrderedDict() auc_score_list = OrderedDict() for name in name_list: print(name) test_fold_index, oof, model_list = tr.training_Testfold( 'classification', model_dict[name], param_list[name], fitpm_list[name], metric_func, X, y, nfold_test, nfold_val) result_list[name] = [test_fold_index, oof, model_list] #auc_score_list[name] = roc_auc_score(np.where(y > 0.5 , 1 ,0 ) , np.argmax(oof.mean(axis = 0) , axis = 1)) print('Test_Classification_TestFold Compelte') return result_list
def Test_Regression_TestFold(X, y, nfold_test, nfold_val, verbose=True): # name list name_list = ['xgb', 'lgb', 'cat', 'rfc', 'elt', 'svm', 'gpc'] # model_list , param_list , model_dict = OrderedDict() model_dict['xgb'] = mi.myXGBRegressor() model_dict['lgb'] = mi.myLGBMRegressor() model_dict['cat'] = mi.myCatBoostRegressor() model_dict['rfc'] = mi.myRandomForestRegressor() model_dict['elt'] = mi.myElasticNetRegressor() model_dict['svm'] = mi.mySVMRegressor() model_dict['gpc'] = mi.myGPRegressor() param_list = OrderedDict() param_list['xgb'] = mp.param_xgb('regression', use_gpu=False) param_list['lgb'] = mp.param_lgbm('regression', use_gpu=False) param_list['cat'] = mp.param_cat('regression', use_gpu=True, is_unbalance=False) param_list['rfc'] = mp.param_rf('regression') param_list['elt'] = mp.param_elst('regression') param_list['svm'] = mp.param_svm('regression') param_list['gpc'] = mp.param_gpc('regression') #fitting parmas fitpm_list = OrderedDict() for name in name_list: fitpm_list[name] = {} fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1} # metric func metric_func = mean_squared_error auc_score_list = OrderedDict() result_list = OrderedDict() for name in name_list: print(name) print('Model : {}'.format(name)) test_fold_index, oof, model_list = tr.training_Testfold( 'regression', model_dict[name], param_list[name], fitpm_list[name], metric_func, X, y, nfold_test, nfold_val) result_list[name] = [test_fold_index, oof, model_list] # 모든 데이터에 대해 예측값이 oof에 저장되어 있다. auc_score_list[name] = roc_auc_score(np.where(y > 25, 1, 0), oof.mean(axis=1)) return result_list
def Test_Regression_noVal(xtrain, ytrain, xtest): # name list name_list = ['xgb', 'lgb', 'cat', 'rfc', 'elt', 'svm', 'gpc'] # model_list , param_list , model_list = OrderedDict() model_list['xgb'] = mi.myXGBRegressor() model_list['lgb'] = mi.myLGBMRegressor() model_list['cat'] = mi.myCatBoostRegressor() model_list['rfc'] = mi.myRandomForestRegressor() model_list['elt'] = mi.myElasticNetRegressor() model_list['svm'] = mi.mySVMRegressor() model_list['gpc'] = mi.myGPRegressor() param_list = OrderedDict() param_list['xgb'] = mp.param_xgb('regression', use_gpu=False) param_list['lgb'] = mp.param_lgbm('regression', use_gpu=False) param_list['cat'] = mp.param_cat('regression', use_gpu=True, is_unbalance=False) param_list['rfc'] = mp.param_rf('regression') param_list['elt'] = mp.param_elst('regression') param_list['svm'] = mp.param_svm('regression') param_list['gpc'] = mp.param_gpc('regression') #fitting parmas fitpm_list = OrderedDict() for name in name_list: fitpm_list[name] = {} fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1} # metric func metric_func = mean_squared_error result_list = OrderedDict() for name in name_list: print(name) res_pred, model = tr.training_fixedTest_noVal( 'regression', model_list[name], param_list[name], fitpm_list[name], metric_func, xtrain, ytrain, xtest) result_list[name] = [res_pred, model] print('Test_Regression Complete') return result_list
def Test_Binary(xtrain, ytrain, xtest, nfold=5, verbose=False): # name list name_list = [ 'xgb', 'lgb', #'cat', 'rfc', 'svm', #'gpc', 'lda', 'qda', 'rdg', 'lso', 'ann' ] # model_dicts , param_list , model_dicts = OrderedDict() model_dicts['xgb'] = mi.myXGBBinary() model_dicts['lgb'] = mi.myLGBMBinary() model_dicts['cat'] = mi.myCatBoostBinary() model_dicts['rfc'] = mi.myRandomForestBinary() model_dicts['svm'] = mi.mySVMBinary() model_dicts['gpc'] = mi.myGPBinary() model_dicts['lda'] = mi.myLDABinary() model_dicts['qda'] = mi.myQDABinary() model_dicts['rdg'] = mi.myRidgeBinary() model_dicts['lso'] = mi.myLassoBinary() model_dicts['ann'] = mi.myANNBinary() param_list = OrderedDict() param_list['xgb'] = mp.param_xgb('binary', len(np.unique(ytrain)), use_gpu=False) param_list['lgb'] = mp.param_lgbm('binary', len(np.unique(ytrain)), use_gpu=False) param_list['cat'] = mp.param_cat('binary', use_gpu=True, is_unbalance=False) param_list['rfc'] = mp.param_rf('binary') param_list['svm'] = mp.param_svm('binary') param_list['gpc'] = mp.param_gpc('binary') param_list['lda'] = mp.param_lda() param_list['qda'] = mp.param_qda() param_list['rdg'] = mp.param_ridge('binary') param_list['lso'] = mp.param_lasso('binary') param_list['ann'] = mp.param_ANN() params_xgb = { 'colsample_bytree': 0.018359345409703118, 'max_delta_step': 10.0, 'max_depth': 100, 'min_child_weight': 0.0, 'n_estimators': 800, 'reg_alpha': 2.0, 'reg_lambda': 10.0, 'subsample': 1.0 } params_lgb = { 'bagging_fraction': 0.9146615380853989, 'colsample_bytree': 0.7384250232683872, 'feature_fraction': 0.2892361777710602, 'lambda_l1': 6.11807950735429, 'lambda_l2': 9.779990080293718, 'learning_rate': 0.001, 'max_depth': 9, 'min_child_weight': 0.6385281864950193, 'min_data_in_leaf': 1, 'min_split_gain': 0.5944870633301388, 'num_leaves': 969, 'reg_alpha': 18.045166839320736, 'reg_lambda': 8.490946187426754, 'subsample': 0.16006631386138065 } ''' params_cat = {'bagging_temperature': 36.514154289873396, 'depth': 7, 'iterations': 1884, 'l2_leaf_reg': 5, 'learning_rate': 0.8494130280301052, 'random_strength': 39.83926219359324} ''' params_rfc = { 'max_depth': 3, 'max_features': 0.26390005062522226, 'n_estimators': 89 } params_rdg = {'normalize': False} params_lso = {'alpha': 0.03, 'normalize': False} param_list['xgb'] = {**param_list['xgb'], **params_xgb} # 뒤의 딕셔너리가 우선이 된다. param_list['lgb'] = {**param_list['lgb'], **params_lgb} #param_list['cat']= {**param_list['cat'], **params_cat} param_list['rfc'] = {**param_list['rfc'], **params_rfc} param_list['rdg'] = {**param_list['rdg'], **params_rdg} param_list['lso'] = {**param_list['lso'], **params_lso} #fitting parmas fitpm_list = OrderedDict() for name in name_list: fitpm_list[name] = {} #fitpm_list['lgb'] = {'early_stopping_rounds' : 12 , 'verbose' : -1} #fit_cat = {} #fit_xgb = {} # metric func metric_func = roc_auc_score # Result result_list = OrderedDict() # Training for name in name_list: print(name) prediction, model = tr.training_fixedTest_noVal( 'binary', model_dicts[name], param_list[name], fitpm_list[name], metric_func, xtrain, ytrain, xtest, verbose) result_list[name] = [prediction, model] return result_list