'subsample_freq': 0, 'colsample_bytree': 1.0, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'random_state': 42, 'n_jobs': -1, 'silent': True, 'importance_type': 'split', 'num_boost_round': 500, 'tree_learner': 'feature' } lgb = GBM(package='lightgbm', X=data['X'], y=data['y'], feature_names=data['features'], cv=5, grid_search=False, eval_metric='rmse', parameters=parameters) lgb.run_model() print(lgb.__dict__) ''' print(data['features']) plot_importance(lgb.model) plt.show() plt.clf() plot_metric(lgb.model, metric='rmse') plt.show() plt.clf() plot_tree(lgb.model)
'cat_features': None, 'grow_policy': None, 'min_data_in_leaf': None, 'min_child_samples': None, 'max_leaves': None, 'num_leaves': None, 'score_function': None, 'leaf_estimation_backtracking': None, 'ctr_history_unit': None, 'monotone_constraints': None } catboost = GBM(package='catboost', X=X[y < 200000], y=y[y < 200000], feature_names=data['features'], cv=5, grid_search=False, eval_metric='rmse', parameters=parameters) catboost.run_model() print(catboost.__dict__) np.save('catboost_res.npy', catboost.__dict__) catboost.parity_plot(data='train', quantity='CT_RT', scheme=1).savefig('parity_CT_RT_train.png') catboost.parity_plot(data='test', quantity='CT_RT', scheme=1).savefig('parity_CT_RT_test.png') plt.clf() explainer = shap.TreeExplainer(catboost.model[-1]) shap_values = explainer.shap_values(data['X'])
'num_boost_round': 5000, 'tree_learner': 'feature' } CT_RT = np.array([i[1] for i in data['y2']]) CT_Temp = np.array([i[2] for i in data['y2']]) CT_CS = np.array([i[3] for i in data['y2']]) ID = [i[0] for i in data['y2']] C = np.array([C_data[i] for i in ID]) lgb = GBM(package='lightgbm', X=data['X'], y=data['y'], model_scheme='LMP', cv=5, grid_search=False, eval_metric='rmse', parameters=parameters, CT_RT=CT_RT, CT_Temp=CT_Temp, C=C) lgb.run_model() print(lgb.__dict__) lgb.parity_plot(data='train', quantity='LMP').savefig('parity_LMP_train.png') lgb.parity_plot(data='test', quantity='LMP').savefig('parity_LMP_test.png') lgb.parity_plot(data='train', quantity='CT_RT').savefig('parity_CT_RT_train.png') lgb.parity_plot(data='test', quantity='CT_RT').savefig('parity_CT_RT_test.png') np.save('lgb_dict.npy', lgb.__dict__) plt.clf()
'leaf_estimation_backtracking': None, 'ctr_history_unit': None, 'monotone_constraints': None } CT_RT = np.array([i[1] for i in data['y2']]) CT_Temp = np.array([i[2] for i in data['y2']]) ID = [i[0] for i in data['y2']] C = np.array([25 for i in ID]) catboost = GBM(package='catboost', model_scheme='LMP', X=data['X'], y=data['y'], feature_names=data['features'], cv=5, grid_search=False, eval_metric='rmse', parameters=parameters, CT_Temp=CT_Temp, CT_RT=CT_RT, C=C) catboost.run_model() print(catboost.__dict__) catboost.parity_plot(data='train', quantity='LMP').savefig('parity_LMP_train.png') catboost.parity_plot(data='test', quantity='LMP').savefig('parity_LMP_test.png') catboost.parity_plot(data='train', quantity='CT_RT').savefig('parity_CT_RT_train.png') catboost.parity_plot(data='test',
'grow_policy': None, 'min_data_in_leaf': None, 'min_child_samples': None, 'max_leaves': None, 'num_leaves': None, 'score_function': None, 'leaf_estimation_backtracking': None, 'ctr_history_unit': None, 'monotone_constraints': None } catboost = GBM(package='catboost', X=data['X'], y=data['y'], test_size=0.2, feature_names=data['features'], cv=5, grid_search=False, eval_metric='rmse', parameters=parameters) catboost.run_model() print(catboost.__dict__) y_gen = catboost.model[-1].predict(X_generated) df = pd.DataFrame(data['y'], columns=['rupture life']) df['sample'] = 'real' df = df.append(pd.DataFrame({ 'rupture life': y_gen, 'sample': ['synthetic' for i in range(len(y_gen))] }),
'cat_features': None, 'grow_policy': None, 'min_data_in_leaf': None, 'min_child_samples': None, 'max_leaves': None, 'num_leaves': None, 'score_function': None, 'leaf_estimation_backtracking': None, 'ctr_history_unit': None, 'monotone_constraints': None } catboost = GBM(package='catboost', X=data['X'][data['y'] < 200000], y=data['y'][data['y'] < 200000], feature_names=data['features'], cv=5, grid_search=False, eval_metric='rmse', parameters=parameters) features = [ i for i in df.columns if i not in ['CT_RT', 'CT_CS', 'CT_MCR', 'ID'] ] features = [i for i in features if 'Weighted' not in i] features.append('CT_RT') X = df[features].to_numpy(np.float32) y = df['CT_RT'].to_numpy(np.float32) pdata = ProcessData(X=X, y=y, features=features) pdata.clean_data() data = pdata.get_data()
eval_metric='rmse', param_grid=parameters_grid) lgb.run_model() print(lgb.__dict__) ''' parameters = {'booster': 'gbtree', #gbtree, gblinear, dart 'eta': 0.3, #learning rate 'gamma': 0, #min split loss 'max_depth': 10, 'tree_method': 'auto'} xgboost = GBM(package='xgboost', X=data['X'], y=data['y'], feature_names=data['features'], cv=5, grid_search=False, eval_metric='rmse', parameters=parameters) xgboost.run_model() print(xgboost.__dict__) xgboost.parity_plot(data='train', quantity='LMP').savefig('parity_LMP_train.png') xgboost.parity_plot(data='test', quantity='LMP').savefig('parity_LMP_test.png') np.save('xgb_dict.npy', xgboost.__dict__) plt.clf() explainer = shap.TreeExplainer(xgboost.model[-1]) shap_values = explainer.shap_values(data['X']) XX = scale.inverse_transform(data['X'])
'monotone_constraints': None } CT_RT = np.array([i[1] for i in data['y2']]) CT_Temp = np.array([i[2] for i in data['y2']]) CT_CS = np.array([i[3] for i in data['y2']]) ID = [i[0] for i in data['y2']] C = np.array([C_data[i] for i in ID]) catboost = GBM( package='catboost', X=data['X'], y=data['y'], model_scheme='LMP', cv=5, #grid_search=True, #grid_search_scoring='r2', #param_grid=parameter_grid, eval_metric='rmse', parameters=parameters, CT_Temp=CT_Temp, CT_RT=CT_RT, C=C) catboost.run_model() print(catboost.__dict__) ''' catboost.parity_plot(data='train', quantity='LMP').savefig('parity_LMP_train.png') catboost.parity_plot(data='test', quantity='LMP').savefig('parity_LMP_test.png') catboost.parity_plot(data='train', quantity='CT_RT').savefig('parity_CT_RT_train.png') catboost.parity_plot(data='test', quantity='CT_RT').savefig('parity_CT_RT_test.png') np.save('catboost_dict.npy', catboost.__dict__)
#'reg_lambda': [None], #'objective': [None], #'eta': [None], #'early_stopping_rounds': [None], #'cat_features': [None], 'grow_policy': ['SymmetricTree', 'Depthwise', 'Lossguide'], #'min_data_in_leaf': [None], #'min_child_samples': [None], #'max_leaves': [None], #'num_leaves': [None], #'score_function': [None], #'leaf_estimation_backtracking': [None], #'ctr_history_unit': [None], #'monotone_constraints': [None]} } catboost = GBM(package='catboost', X=data['X'], y=data['y'], feature_names=data['features'], cv=10, grid_search=True, grid_search_scoring='r2', param_grid=param_grid, eval_metric='rmse') catboost.run_model() print(catboost.__dict__) np.save('catboost_grid_res.npy', catboost.__dict__)