Exemplo n.º 1
0
from sklearn.externals import joblib

manager = ModelManager()
train = pd.concat(
    manager.read_data(global_dirs.splitted_data_path,
                      formats=["hdf"],
                      type="train",
                      verbose=False)[1].values())
validation = pd.concat(
    manager.read_data(global_dirs.splitted_data_path,
                      formats=["hdf"],
                      type="validation",
                      verbose=False)[1].values())

manager.assign_sets(train=train)
tup = manager.create_mask(
    train.iloc[:, :-1],
    global_dirs.variable_selection[0],
    select=global_dirs.variable_selection[1]
)  # This tuple shouldn't take care about y_column index
scalers = manager.preprocess_train(tup, scale_Y=True)

#max_depth_tuning=np.arange(1,50)
#best_depth=1
#best_r2=0
#history_r2=[]
#for md in max_depth_tuning:
#    manager.fit_rf_regression(max_depth=md)
#    r2=manager.predict_rf_regression(validation, tup)["r2"]
#    if abs(r2) > abs(best_r2):
from sklearn.externals import joblib

manager = ModelManager()
train = pd.concat(
    manager.read_data(global_dirs.splitted_data_path,
                      formats=["hdf"],
                      type="train",
                      verbose=False)[1].values())
validation = pd.concat(
    manager.read_data(global_dirs.splitted_data_path,
                      formats=["hdf"],
                      type="validation",
                      verbose=False)[1].values())

manager.assign_sets(train=train, val=validation)
#tup = manager.create_mask(train.iloc[:,:-1], [0, 1, 2], select=False) #This tuple shouldn't take care about y_column index
tup = manager.create_mask(
    train.iloc[:, :-1],
    global_dirs.variable_selection[0],
    select=global_dirs.variable_selection[1]
)  #This tuple shouldn't take care about y_column index
scalers = manager.preprocess_train(tup, scale_Y=False)

xgb_model = manager.fit_xgboost_regression()

if not os.path.isdir(global_dirs.results_path):
    os.mkdir(global_dirs.results_path)
if not os.path.isdir(global_dirs.xgboost_path):
    os.mkdir(global_dirs.xgboost_path)
if not os.path.isdir(global_dirs.xgboost_path + "scalers/"):