def settings(model, epochs, data_dir, sims=True): if sims: gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\simulations\grid_search_results_{model}2_adaptPool.csv" )) else: gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\observations\mlp\grid_search_results_{model}2.csv" )) setup = gridsearch_results.iloc[ gridsearch_results['mae_val'].idxmin()].to_dict() dimensions = literal_eval(setup["hiddensize"]) dimensions.append(1) # adds the output dimension! if sims: featuresize = setup["featuresize"] else: featuresize = None hparams = { "batchsize": int(setup["batchsize"]), "epochs": epochs, "history": int(setup["history"]), "hiddensize": literal_eval(setup["hiddensize"]), "learningrate": setup["learningrate"] } model_design = { "dimensions": dimensions, "activation": nn.ReLU, "featuresize": featuresize } X, Y = preprocessing.get_splits( sites=['hyytiala'], years=[2001, 2002, 2003, 2004, 2005, 2006, 2007], datadir=os.path.join(data_dir, "data"), dataset="profound", simulations=None) X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'], years=[2008], datadir=os.path.join( data_dir, "data"), dataset="profound", simulations=None) return hparams, model_design, X, Y, X_test, Y_test
def preles_errors( site, data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"): X_test, Y_test = preprocessing.get_splits(sites=[site], years=[2008], datadir=os.path.join( data_dir, "data"), dataset="profound", simulations=None) prelesGPP_def = pd.read_csv( f"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\data\profound\output{site}2008def", sep=";") prelesGPP_calib = pd.read_csv( f"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\data\profound\output{site}2008calib", sep=";") rmse_train = utils.rmse(Y_test, prelesGPP_def)[0] rmse_val = utils.rmse(Y_test, prelesGPP_calib)[0] mae_train = metrics.mean_absolute_error(Y_test, prelesGPP_def) mae_val = metrics.mean_absolute_error(Y_test, prelesGPP_calib) errors = [rmse_train, rmse_val, mae_train, mae_val] return (errors)
def plot3d(sparse = False): data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites = ['hyytiala'], years = [2008], datadir = os.path.join(data_dir, "data"), dataset = "profound", simulations = None) Y_preds = np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\sparse\models\mlp8\sparse1\setting1\y_preds.npy", allow_pickle=True) visualizations.plot_prediction(Y, Y_preds, "Hyytiälä (2008)") plt.legend(loc="upper right")
def plot3e(sparse = False): data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites = ['hyytiala'], years = [2008], datadir = os.path.join(data_dir, "data"), dataset = "profound", simulations = None) Y_preds = np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\models\lstm0\y_preds.npy", allow_pickle=True) visualizations.plot_prediction(Y, Y_preds, "Hyytiälä (2008)") plt.legend(loc="upper right") mae = metrics.mean_absolute_error(Y[:Y_preds.shape[1]], np.mean(Y_preds, 0)) plt.text(10,10, f"MAE = {np.round(mae, 4)}")
def plot3f(years=[2001,2002,2003, 2004, 2005, 2006, 2007]): data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites = ['hyytiala'], years = [2008], datadir = os.path.join(data_dir, "data"), dataset = "profound", simulations = None) predictions_test, errors = finetuning.featureExtractorC("mlp", 10, None, 50, years = years) Y_preds = np.array(predictions_test) visualizations.plot_prediction(Y, Y_preds, "Hyytiälä (2008)") plt.legend(loc="upper right") mae = metrics.mean_absolute_error(Y, np.mean(Y_preds, 0)) plt.text(10,10, f"MAE = {np.round(mae, 4)}")
def plot3a(): data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites = ['hyytiala'], years = [2008], datadir = os.path.join(data_dir, "data"), dataset = "profound", simulations = None) #Y_preles = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008def"), sep=";") #Y_preles_calib = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008calib"), sep=";") fig, ax = plt.subplots(figsize=(7,7)) fig.suptitle("Hyytiälä (2008)") ax.plot(Y, color="green", label="Ground Truth", marker = "o", linewidth=0.8, alpha=0.9, markerfacecolor='green', markersize=4) #ax.plot(Y_preles, color="blue", label="PRELES \nPredictions", marker = "", alpha=0.5) #ax.plot(Y_preles_calib, color="green", label="PRELES \nPredictions", marker = "", alpha=0.5) ax.set(xlabel="Day of Year", ylabel="GPP [g C m$^{-2}$ day$^{-1}$]") plt.legend()
def plot4(w, model, years=[2001,2002,2003, 2004, 2005, 2006, 2007]): def moving_average(x, w): return np.convolve(x, np.ones(w), 'valid') / w data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites = ['hyytiala'], years = [2008], datadir = os.path.join(data_dir, "data"), dataset = "profound", simulations = None) Y_preles = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008calib"), sep=";") Y_nn = np.transpose(np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\models\mlp0\noPool\sigmoid\y_preds.npy", allow_pickle=True).squeeze(2)) predictions_test, errors = finetuning.featureExtractorC("mlp", 10, None, 50, years = years) Y_nn_f = np.transpose(np.array(predictions_test).squeeze(2)) mt = moving_average(Y.squeeze(1), w) mp = moving_average(Y_preles.squeeze(1), w) mn = moving_average(np.mean(Y_nn, axis=1), w) mnf = moving_average(np.mean(Y_nn_f, axis=1), w) plt.figure(num=None, figsize=(7, 7), facecolor='w', edgecolor='k') plt.plot(mt, label="Groundtruth", color="lightgrey") if model=="preles": plt.plot(mp, label="PRELES \npredictions", color="green") maep = metrics.mean_absolute_error(mt, mp) plt.text(10,9, f"MAE = {np.round(maep, 4)}") elif model=="mlp0": plt.plot(mn, label="MLP \npredictions", color="green") maen = metrics.mean_absolute_error(mt, mn) plt.text(10,9, f"MAE = {np.round(maen, 4)}") elif model=="mlp10": plt.plot(mnf, label="Finetuned MLP \npredictions", color="green") maen = metrics.mean_absolute_error(mt, mnf) plt.text(10,9, f"MAE = {np.round(maen, 4)}") plt.xlabel("Day of Year") plt.ylabel("Average GPP over 7 days [g C m$^{-2}$ day$^{-1}$]") plt.legend()
import setup.preprocessing as preprocessing from setup.dev_mlp import _selection_parallel import pandas as pd import time import os.path import multiprocessing as mp import itertools import torch.nn as nn #%% Load Data data_dir = r"/home/fr/fr_fr/fr_mw263" X, Y = preprocessing.get_splits(sites = ['bily_kriz', 'soro','collelongo'], years = [2001,2002,2003,2004,2005,2006, 2007], datadir = os.path.join(data_dir, "scripts/data"), dataset = "profound", simulations = None) X_test, Y_test = preprocessing.get_splits(sites = ['bily_kriz', 'soro','collelongo'], years = [2008], datadir = os.path.join(data_dir, "scripts/data"), dataset = "profound", simulations = None) #%% Grid search of hparams hiddensize = [16, 64, 128, 256] batchsize = [16, 64, 128, 256] learningrate = [1e-5, 1e-4, 1e-3, 5e-3, 1e-2] history = [0,1,2] activation = [nn.ReLU]
import sys sys.path.append( 'OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python') import finetuning import setup.preprocessing as preprocessing import visualizations import os.path import numpy as np #%% Load Data: Profound in and out. datadir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites=['hyytiala'], years=[2001, 2002, 2003, 2004, 2005, 2006], datadir=os.path.join(datadir, "data"), dataset="profound", simulations=None) #%% pretrained_model = visualizations.losses("mlp", 7, "") running_losses, performance, y_tests, y_preds = finetuning.finetune( X, Y, epochs=100, model="mlp", pretrained_type=7) #%% visualizations.plot_running_losses(running_losses["mae_train"], running_losses["mae_val"], "", "mlp") print(np.mean(np.array(performance), axis=0)) res_mlp = visualizations.losses("mlp", 0, "")
@author: marie """ import setup.preprocessing as preprocessing import os.path import multiprocessing as mp import pandas as pd import numpy as np #%% Load Data: Profound in and out. data_dir = r"/home/fr/fr_fr/fr_mw263" X, Y = preprocessing.get_splits( sites=['hyytiala'], years=[2001, 2002, 2003, 2004, 2005, 2006, 2007], datadir=os.path.join(data_dir, "scripts/data"), dataset="profound", simulations=None) X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'], years=[2008], datadir=os.path.join( data_dir, "scripts/data"), dataset="profound", simulations=None) def subset_data(data, perc): n_subset = int(np.floor(data.shape[0] / 100 * perc)) subset = data[:n_subset, :]
#%% Set working directory import setup.preprocessing as preprocessing from setup.dev_mlp import _selection_parallel import pandas as pd import time import os.path import multiprocessing as mp import itertools import torch.nn as nn #%% Load Data data_dir = r"/home/fr/fr_fr/fr_mw263" X, Y = preprocessing.get_splits(sites = ["bily_kriz"], years = [2005, 2006], datadir = os.path.join(data_dir, "scripts/data"), dataset = "profound", simulations = None) X_test, Y_test = preprocessing.get_splits(sites = ['bily_kriz'], years = [2008], datadir = os.path.join(data_dir, "scripts/data"), dataset = "profound", simulations = None) #%% Grid search of hparams hiddensize = [8, 16, 32, 64, 128, 256] batchsize = [8, 16, 32, 64, 128, 256] learningrate = [1e-4, 1e-3, 5e-3, 1e-2] history = [0,1,2] n_layers = [1,2,3]
import setup.models as models from ast import literal_eval import torch.nn as nn import torch import setup.preprocessing as preprocessing import setup.dev_mlp as dev_mlp import setup.utils as utils import collect_results import finetuning from sklearn import metrics data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X_train, Y_train = preprocessing.get_splits( sites=['hyytiala'], years=[2001, 2002, 2003, 2004, 2005, 2006, 2007], datadir=os.path.join(data_dir, "data"), dataset="profound", simulations=None) X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'], years=[2008], datadir=os.path.join( data_dir, "data"), dataset="profound", simulations=None) #%% Number of Network Parameters #mods = [5,7,10,12,13,14] mods = [0, 4, 5] dummies = False for mod in mods:
@author: marie """ import setup.preprocessing as preprocessing import setup.models as models import finetuning import os.path import numpy as np import pandas as pd import torch data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" #%% X_test, Y_test = preprocessing.get_splits(sites = ['hyytiala'], years = [2001, 2002, 2003, 2004, 2005, 2006, 2008], datadir = os.path.join(data_dir, "data"), dataset = "profound", simulations = None, to_numpy=False) df_new = pd.DataFrame({"PAR": X_test["PAR"].mean(), "TAir": np.arange(X_test["TAir"].min(), X_test["TAir"].max(), step=0.01), "VPD": X_test["VPD"].mean(), "Precip": X_test["Precip"].mean(), "fAPAR": X_test["fAPAR"].mean(), "DOY_sin": X_test["DOY_sin"].mean(), "DOY_cos": X_test["DOY_cos"].mean()}) df_new.to_csv(os.path.join(data_dir, "data\post_analysis\df1.csv"), sep=",") df = df_new.to_numpy() #%%
import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import setup.models as models import numpy as np import setup.utils as utils import setup.preprocessing as preprocessing import matplotlib.pyplot as plt import random #%%% datadir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits( sites=['le_bray'], years=[2001], datadir=os.path.join(datadir, "data"), dataset="profound", simulations=None, colnames=["PAR", "TAir", "VPD", "Precip", "fAPAR", "DOY_sin", "DOY_cos"], to_numpy=True) #%% Train X = utils.minmax_scaler(X) X = torch.tensor(X).type(dtype=torch.float) Y = torch.tensor(Y).type(dtype=torch.float) #model = models.MLP([X.shape[1],12,1], nn.ReLU) #model = models.LSTM(X.shape[1], 12, 1, 10, F.relu) x, target = utils.create_batches(X, Y, 128, 0) #x_test, target_test = utils.create_batches(X, Y, 128, 0)
def plot5(model, w = None, years=[2001,2002,2003, 2004, 2005, 2006, 2007]): def moving_average(x, w): return np.convolve(x, np.ones(w), 'valid') / w data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites = ['hyytiala'], years = [2008], datadir = os.path.join(data_dir, "data"), dataset = "profound", simulations = None) Y_preles = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008calib"), sep=";") Y_nn = np.transpose(np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\models\mlp0\noPool\sigmoid\y_preds.npy", allow_pickle=True).squeeze(2)) predictions_test, errors = finetuning.featureExtractorC("mlp", 10, None, 50, years = years) Y_nn_f = np.transpose(np.array(predictions_test).squeeze(2)) if not w is None: Y = moving_average(Y.squeeze(1), w) Y_preles = moving_average(Y_preles.squeeze(1), w) Y_nn = moving_average(np.mean(Y_nn, axis=1), w) Y_nn_f = moving_average(np.mean(Y_nn_f, axis=1), w) else: Y = Y.squeeze(1) Y_preles = Y_preles.squeeze(1) Y_nn = np.mean(Y_nn, axis=1) Y_nn_f = np.mean(Y_nn_f, axis=1) plt.figure(num=None, figsize=(7, 7), facecolor='w', edgecolor='k') if model == "preles": plt.scatter(Y_preles, Y, color="darkblue") # Fit with polyfit b, m = polyfit(Y_preles, Y, 1) r2_p = rsquared(Y_preles, Y, 1)["determination"] plt.plot(Y_preles, b + m * Y_preles, '-', color="darkred", label = "y = a + b $\hat{y}$ ") maep = metrics.mean_absolute_error(Y, Y_preles) plt.text(0,10, f"MAE = {np.round(maep, 4)}") plt.text(0,9, f"R$^2$ = {np.round(r2_p, 4)}") elif model == "mlp0": plt.scatter(Y_nn, Y, color="darkblue") # Fit with polyfit b, m = polyfit(Y_nn, Y, 1) r2_nn = rsquared(Y_nn, Y, 1)["determination"] plt.plot(Y_nn, b + m *Y_nn, '-', color="darkred", label = "y = a + b $\hat{y}$ ") maen = metrics.mean_absolute_error(Y, Y_nn) plt.text(0,10, f"MAE = {np.round(maen, 4)}") plt.text(0,9, f"R$^2$ = {np.round(r2_nn, 4)}") elif model == "mlp10": plt.scatter(Y_nn_f, Y, color="darkblue") b, m = polyfit(Y_nn_f, Y, 1) r2_nnf = rsquared(Y_nn_f, Y, 1)["determination"] plt.plot(Y_nn_f, b + m * Y_nn_f, '-', color="darkred", label = "y = a + b $\hat{y}$ ") maenf = metrics.mean_absolute_error(Y, Y_nn_f) plt.text(0,10, f"MAE = {np.round(maenf, 4)}") plt.text(0,9, f"R$^2$ = {np.round(r2_nnf, 4)}") plt.plot(np.arange(11), 0 + 1 *np.arange(11), '--', color="gray", label = "y = $\hat{y}$") plt.xlim((-1,11)) plt.ylim((-1,11)) plt.ylabel("True GPP Test [g C m$^{-2}$ day$^{-1}$]") plt.xlabel("Estimated GPP Test [g C m$^{-2}$ day$^{-1}$]") plt.legend(loc="lower right")
import os.path import setup.dev_cnn as dev_cnn import setup.dev_mlp as dev_mlp import setup.dev_lstm as dev_lstm import torch import torch.nn as nn import setup.preprocessing as preprocessing import visualizations import numpy as np #%% data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" #%% X, Y = preprocessing.get_splits(sites=['le_bray'], years=[2001, 2003], datadir=os.path.join(data_dir, "data"), dataset="profound", simulations=None) #%% X_sims, Y_sims = preprocessing.get_simulations(data_dir=os.path.join( data_dir, r"data\simulations\uniform_params"), drop_parameters=True) #%% hparams = { "batchsize": 256, "epochs": 1000, "history": 7, "hiddensize": 128, "learningrate": 0.01
def train_network(model, typ, site, epochs, q, adaptive_pooling, dropout_prob, dropout, sparse=None, traindata_perc=None, save=True, data_dir=r"/home/fr/fr_fr/fr_mw263"): """ Takes the best found model parameters and trains a MLP with it. Args: X, Y (numpy array): Feature and Target data. \n model_params (dict): dictionary containing all required model parameters. \n epochs (int): epochs to train the model. \n splits (int): How many splits will be used in the CV. \n eval_set (numpy array): if provided, used for model evaluation. Default to None. Returns: running_losses: epoch-wise training and validation errors (rmse and mae) per split.\n y_tests: Target test set on which the model was evaluated on per split.\n y_preds: Network predictions per split.\n performance (pd.DataFrame): Data frame of model parameters and final training and validation errors.\n """ X, Y = preprocessing.get_splits( sites=[site], years=[2001, 2002, 2003, 2004, 2005, 2006, 2007], datadir=os.path.join(data_dir, "scripts/data"), dataset="profound", simulations=None) if not sparse is None: ind = np.random.choice(X.shape[0], int(np.floor(X.shape[0] / 100 * sparse)), replace=False) X, Y = X[ind], Y[ind] X_test, Y_test = preprocessing.get_splits(sites=[site], years=[2008], datadir=os.path.join( data_dir, "scripts/data"), dataset="profound", simulations=None) eval_set = {"X_test": X_test, "Y_test": Y_test} hparams, model_design = set_model_parameters(model, typ, epochs, adaptive_pooling, X, Y) start = time.time() data_dir = os.path.join(data_dir, f"output/models/{model}{typ}") data_dir = os.path.join(data_dir, f"relu") if not sparse is None: data_dir = os.path.join(data_dir, f"sparse//{sparse}") dev = __import__(f"setup.dev_{model}", fromlist=["selected"]) running_losses, performance, y_tests, y_preds = dev.train_model_CV( hparams, model_design, X, Y, eval_set, dropout_prob, dropout, data_dir, save) end = time.time() # performance returns: rmse_train, rmse_test, mae_train, mae_test in this order. performance = np.mean(np.array(performance), axis=0) rets = [(end - start), hparams["hiddensize"], hparams["batchsize"], hparams["learningrate"], hparams["history"], model_design["activation"], performance[0], performance[1], performance[2], performance[3]] results = pd.DataFrame([rets], columns=[ "execution_time", "hiddensize", "batchsize", "learningrate", "history", "activation", "rmse_train", "rmse_val", "mae_train", "mae_val" ]) results.to_csv(os.path.join(data_dir, r"selected_results.csv"), index=False) # Save: Running losses, ytests and ypreds. np.save(os.path.join(data_dir, "running_losses.npy"), running_losses) np.save(os.path.join(data_dir, "y_tests.npy"), y_tests) np.save(os.path.join(data_dir, "y_preds.npy"), y_preds) if not sparse is None: np.save(os.path.join(data_dir, "ind.npy"), ind)
def settings(typ, epochs, data_dir, dummies, sparse=None, years=[2001, 2002, 2003, 2004, 2005, 2006, 2007], random_days=None): X, Y = preprocessing.get_splits(sites=['hyytiala'], years=years, datadir=os.path.join(data_dir, "data"), dataset="profound", simulations=None) X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'], years=[2008], datadir=os.path.join( data_dir, "data"), dataset="profound", simulations=None) if dummies: Xf = np.zeros((X.shape[0], 12)) Xf_test = np.zeros((X_test.shape[0], 12)) Xf[:, :7] = X X = Xf Xf_test[:, :7] = X_test X_test = Xf_test if not random_days is None: ind = np.random.choice(X.shape[0], random_days) X, Y = X[ind], Y[ind] if not sparse is None: ind = np.load( os.path.join( data_dir, f"python\outputs\models\mlp{typ}\\relu\sparse\\{sparse}\ind.npy" )) X, Y = X[ind], Y[ind] if ((typ == 6) | (typ == 8)): #gridsearch_results = pd.read_csv(os.path.join(data_dir, f"python\outputs\grid_search\simulations\grid_search_results_{model}2_adaptPool.csv")) gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\simulations\\7features\grid_search_results_mlp2_np.csv" )) elif ((typ == 0) | (typ == 9) | (typ == 10) | (typ == 13)): gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv" )) elif ((typ == 4) | (typ == 11) | (typ == 12) | (typ == 14)): gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv" )) gridsearch_results = gridsearch_results[( gridsearch_results.nlayers == 3)].reset_index() elif ((typ == 5) | (typ == 7)): gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\observations\mlp\AdaptPool\\7features\grid_search_results_mlp2.csv" )) setup = gridsearch_results.iloc[ gridsearch_results['mae_val'].idxmin()].to_dict() dimensions = [X.shape[1]] for dim in literal_eval(setup["hiddensize"]): dimensions.append(dim) dimensions.append(Y.shape[1]) if ((typ == 6) | (typ == 7) | (typ == 8) | (typ == 5)): featuresize = setup["featuresize"] else: featuresize = None hparams = { "batchsize": int(setup["batchsize"]), "epochs": epochs, "history": int(setup["history"]), "hiddensize": literal_eval(setup["hiddensize"]), "learningrate": setup["learningrate"] } model_design = { "dimensions": dimensions, "activation": nn.ReLU, "featuresize": featuresize } return hparams, model_design, X, Y, X_test, Y_test
import setup.preprocessing as preprocessing import pandas as pd import time from setup.dev_rf import rf_selection_parallel import multiprocessing as mp import itertools import setup.utils as utils #%% Load Data data_dir = r"/home/fr/fr_fr/fr_mw263/scripts" X, Y = preprocessing.get_splits( sites=["le_bray"], years=[2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008], datadir=os.path.join(data_dir, "data"), dataset="profound", simulations=None) #%% cv_splits = [6] shuffled = [False] n_trees = [200, 300, 400, 500] depth = [4, 5, 6, 7] eval_set = None p_list = utils.expandgrid(cv_splits, shuffled, n_trees, depth) searchsize = len(p_list[0]) if __name__ == '__main__':
def settings( typ, years=[2001, 2002, 2003, 2004, 2005, 2006, 2007], data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"): X, Y = preprocessing.get_splits(sites=['hyytiala'], years=years, datadir=os.path.join(data_dir, "data"), dataset="profound", simulations=None) X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'], years=[2008], datadir=os.path.join( data_dir, "data"), dataset="profound", simulations=None) if ((typ == 5) | (typ == 13) | (typ == 14)): Xf = np.zeros((X.shape[0], 12)) Xf_test = np.zeros((X_test.shape[0], 12)) Xf[:, :7] = X X = Xf Xf_test[:, :7] = X_test X_test = Xf_test if ((typ == 6) | (typ == 8)): #gridsearch_results = pd.read_csv(os.path.join(data_dir, f"python\outputs\grid_search\simulations\grid_search_results_{model}2_adaptPool.csv")) gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\simulations\\7features\grid_search_results_mlp2_np.csv" )) elif ((typ == 0) | (typ == 9) | (typ == 10) | (typ == 13)): gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv" )) elif ((typ == 4) | (typ == 11) | (typ == 12) | (typ == 14)): gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv" )) gridsearch_results = gridsearch_results[( gridsearch_results.nlayers == 3)].reset_index() elif ((typ == 5) | (typ == 7)): gridsearch_results = pd.read_csv( os.path.join( data_dir, f"python\outputs\grid_search\observations\mlp\AdaptPool\\7features\grid_search_results_mlp2.csv" )) setup = gridsearch_results.iloc[ gridsearch_results['mae_val'].idxmin()].to_dict() dimensions = [X.shape[1]] for dim in literal_eval(setup["hiddensize"]): dimensions.append(dim) dimensions.append(Y.shape[1]) if ((typ == 6) | (typ == 7) | (typ == 8) | (typ == 5)): featuresize = setup["featuresize"] else: featuresize = None hparams = { "batchsize": int(setup["batchsize"]), "epochs": None, "history": int(setup["history"]), "hiddensize": literal_eval(setup["hiddensize"]), "learningrate": setup["learningrate"] } model_design = { "dimensions": dimensions, "activation": nn.ReLU, "featuresize": featuresize } return hparams, model_design, X, Y, X_test, Y_test