예제 #1
0
    'subsample_freq': 0,
    'colsample_bytree': 1.0,
    'reg_alpha': 0.0,
    'reg_lambda': 0.0,
    'random_state': 42,
    'n_jobs': -1,
    'silent': True,
    'importance_type': 'split',
    'num_boost_round': 500,
    'tree_learner': 'feature'
}

lgb = GBM(package='lightgbm',
          X=data['X'],
          y=data['y'],
          feature_names=data['features'],
          cv=5,
          grid_search=False,
          eval_metric='rmse',
          parameters=parameters)

lgb.run_model()
print(lgb.__dict__)
'''
print(data['features'])
plot_importance(lgb.model)
plt.show()
plt.clf()
plot_metric(lgb.model, metric='rmse')
plt.show()
plt.clf()
plot_tree(lgb.model)
예제 #2
0
    'cat_features': None,
    'grow_policy': None,
    'min_data_in_leaf': None,
    'min_child_samples': None,
    'max_leaves': None,
    'num_leaves': None,
    'score_function': None,
    'leaf_estimation_backtracking': None,
    'ctr_history_unit': None,
    'monotone_constraints': None
}

catboost = GBM(package='catboost',
               X=X[y < 200000],
               y=y[y < 200000],
               feature_names=data['features'],
               cv=5,
               grid_search=False,
               eval_metric='rmse',
               parameters=parameters)

catboost.run_model()
print(catboost.__dict__)
np.save('catboost_res.npy', catboost.__dict__)
catboost.parity_plot(data='train', quantity='CT_RT',
                     scheme=1).savefig('parity_CT_RT_train.png')
catboost.parity_plot(data='test', quantity='CT_RT',
                     scheme=1).savefig('parity_CT_RT_test.png')
plt.clf()
explainer = shap.TreeExplainer(catboost.model[-1])
shap_values = explainer.shap_values(data['X'])
예제 #3
0
    'num_boost_round': 5000,
    'tree_learner': 'feature'
}

CT_RT = np.array([i[1] for i in data['y2']])
CT_Temp = np.array([i[2] for i in data['y2']])
CT_CS = np.array([i[3] for i in data['y2']])
ID = [i[0] for i in data['y2']]
C = np.array([C_data[i] for i in ID])

lgb = GBM(package='lightgbm',
          X=data['X'],
          y=data['y'],
          model_scheme='LMP',
          cv=5,
          grid_search=False,
          eval_metric='rmse',
          parameters=parameters,
          CT_RT=CT_RT,
          CT_Temp=CT_Temp,
          C=C)

lgb.run_model()
print(lgb.__dict__)
lgb.parity_plot(data='train', quantity='LMP').savefig('parity_LMP_train.png')
lgb.parity_plot(data='test', quantity='LMP').savefig('parity_LMP_test.png')
lgb.parity_plot(data='train',
                quantity='CT_RT').savefig('parity_CT_RT_train.png')
lgb.parity_plot(data='test', quantity='CT_RT').savefig('parity_CT_RT_test.png')
np.save('lgb_dict.npy', lgb.__dict__)
plt.clf()
예제 #4
0
    'leaf_estimation_backtracking': None,
    'ctr_history_unit': None,
    'monotone_constraints': None
}

CT_RT = np.array([i[1] for i in data['y2']])
CT_Temp = np.array([i[2] for i in data['y2']])
ID = [i[0] for i in data['y2']]
C = np.array([25 for i in ID])

catboost = GBM(package='catboost',
               model_scheme='LMP',
               X=data['X'],
               y=data['y'],
               feature_names=data['features'],
               cv=5,
               grid_search=False,
               eval_metric='rmse',
               parameters=parameters,
               CT_Temp=CT_Temp,
               CT_RT=CT_RT,
               C=C)

catboost.run_model()
print(catboost.__dict__)
catboost.parity_plot(data='train',
                     quantity='LMP').savefig('parity_LMP_train.png')
catboost.parity_plot(data='test',
                     quantity='LMP').savefig('parity_LMP_test.png')
catboost.parity_plot(data='train',
                     quantity='CT_RT').savefig('parity_CT_RT_train.png')
catboost.parity_plot(data='test',
예제 #5
0
    'grow_policy': None,
    'min_data_in_leaf': None,
    'min_child_samples': None,
    'max_leaves': None,
    'num_leaves': None,
    'score_function': None,
    'leaf_estimation_backtracking': None,
    'ctr_history_unit': None,
    'monotone_constraints': None
}

catboost = GBM(package='catboost',
               X=data['X'],
               y=data['y'],
               test_size=0.2,
               feature_names=data['features'],
               cv=5,
               grid_search=False,
               eval_metric='rmse',
               parameters=parameters)

catboost.run_model()
print(catboost.__dict__)
y_gen = catboost.model[-1].predict(X_generated)

df = pd.DataFrame(data['y'], columns=['rupture life'])
df['sample'] = 'real'
df = df.append(pd.DataFrame({
    'rupture life': y_gen,
    'sample': ['synthetic' for i in range(len(y_gen))]
}),
예제 #6
0
    'cat_features': None,
    'grow_policy': None,
    'min_data_in_leaf': None,
    'min_child_samples': None,
    'max_leaves': None,
    'num_leaves': None,
    'score_function': None,
    'leaf_estimation_backtracking': None,
    'ctr_history_unit': None,
    'monotone_constraints': None
}

catboost = GBM(package='catboost',
               X=data['X'][data['y'] < 200000],
               y=data['y'][data['y'] < 200000],
               feature_names=data['features'],
               cv=5,
               grid_search=False,
               eval_metric='rmse',
               parameters=parameters)

features = [
    i for i in df.columns if i not in ['CT_RT', 'CT_CS', 'CT_MCR', 'ID']
]
features = [i for i in features if 'Weighted' not in i]
features.append('CT_RT')
X = df[features].to_numpy(np.float32)
y = df['CT_RT'].to_numpy(np.float32)

pdata = ProcessData(X=X, y=y, features=features)
pdata.clean_data()
data = pdata.get_data()
예제 #7
0
           eval_metric='rmse',
           param_grid=parameters_grid)

lgb.run_model()
print(lgb.__dict__)
'''
parameters = {'booster': 'gbtree', #gbtree, gblinear, dart
              'eta': 0.3, #learning rate
              'gamma': 0, #min split loss
              'max_depth': 10,
              'tree_method': 'auto'}

xgboost = GBM(package='xgboost',
          X=data['X'],
          y=data['y'],
          feature_names=data['features'],
          cv=5,
          grid_search=False,
          eval_metric='rmse',
          parameters=parameters)


xgboost.run_model()
print(xgboost.__dict__)
xgboost.parity_plot(data='train', quantity='LMP').savefig('parity_LMP_train.png')
xgboost.parity_plot(data='test', quantity='LMP').savefig('parity_LMP_test.png')
np.save('xgb_dict.npy', xgboost.__dict__)
plt.clf()
explainer = shap.TreeExplainer(xgboost.model[-1])
shap_values = explainer.shap_values(data['X'])

XX = scale.inverse_transform(data['X'])
예제 #8
0
    'monotone_constraints': None
}

CT_RT = np.array([i[1] for i in data['y2']])
CT_Temp = np.array([i[2] for i in data['y2']])
CT_CS = np.array([i[3] for i in data['y2']])
ID = [i[0] for i in data['y2']]
C = np.array([C_data[i] for i in ID])

catboost = GBM(
    package='catboost',
    X=data['X'],
    y=data['y'],
    model_scheme='LMP',
    cv=5,
    #grid_search=True,
    #grid_search_scoring='r2',
    #param_grid=parameter_grid,
    eval_metric='rmse',
    parameters=parameters,
    CT_Temp=CT_Temp,
    CT_RT=CT_RT,
    C=C)

catboost.run_model()
print(catboost.__dict__)
'''
catboost.parity_plot(data='train', quantity='LMP').savefig('parity_LMP_train.png')
catboost.parity_plot(data='test', quantity='LMP').savefig('parity_LMP_test.png')
catboost.parity_plot(data='train', quantity='CT_RT').savefig('parity_CT_RT_train.png')
catboost.parity_plot(data='test', quantity='CT_RT').savefig('parity_CT_RT_test.png')
np.save('catboost_dict.npy', catboost.__dict__)
예제 #9
0
              #'reg_lambda': [None],
              #'objective': [None],
              #'eta': [None],
              #'early_stopping_rounds': [None],
              #'cat_features': [None],
              'grow_policy': ['SymmetricTree', 'Depthwise', 'Lossguide'],
              #'min_data_in_leaf': [None],
              #'min_child_samples': [None],
              #'max_leaves': [None],
              #'num_leaves': [None],
              #'score_function': [None],
              #'leaf_estimation_backtracking': [None],
              #'ctr_history_unit': [None],
              #'monotone_constraints': [None]}
              }

catboost = GBM(package='catboost',
          X=data['X'],
          y=data['y'],
          feature_names=data['features'],
          cv=10,
          grid_search=True,
          grid_search_scoring='r2',
          param_grid=param_grid,
          eval_metric='rmse')


catboost.run_model()
print(catboost.__dict__)
np.save('catboost_grid_res.npy', catboost.__dict__)