def train_selected_simulations(model,
                               typ,
                               epochs,
                               dropout_prob,
                               simtype,
                               q,
                               featuresize,
                               index=5000,
                               splits=5,
                               change_architecture=None,
                               save=True,
                               eval_set=None,
                               data_dir=r"/home/fr/fr_fr/fr_mw263"):

    X, Y = preprocessing.get_simulations(data_dir=os.path.join(
        data_dir, f"scripts/data/simulations/{simtype}"),
                                         drop_parameters=True)

    results = pd.read_csv(
        os.path.join(data_dir,
                     f"output/grid_search/grid_search_results_{model}1.csv"))

    best_model = results.iloc[results['mae_val'].idxmin()].to_dict()

    if not change_architecture is None:

        for item in change_architecture.items():
            best_model[item[0]] = item[1]

    dev = __import__(f"setup.dev_{model}", fromlist=["selected"])

    X, Y = X[:index, :], Y[:index, :]

    print(f"Fitting {model} {typ} with dropout prob {dropout_prob}")
    print(f"Simulation type {simtype}")

    dev.selected(X, Y, model, typ, best_model, epochs, splits,
                 simtype, featuresize, dropout_prob,
                 os.path.join(data_dir, "output"), save, eval_set)
Exemplo n.º 2
0
import setup.preprocessing as preprocessing

import pandas as pd
import numpy as np
import time

from setup.dev_cnn import _selection_parallel
import multiprocessing as mp
import itertools
import torch.nn as nn

#%% Load Data
data_dir = r"/home/fr/fr_fr/fr_mw263"
X, Y = preprocessing.get_simulations(data_dir=os.path.join(
    data_dir, r"scripts/data/simulations/uniform_params"),
                                     drop_parameters=False)
# Use only first 10 percent of data.
ind = int(np.floor(X.shape[0] / 100 * 10))
X, Y = X[:ind], Y[:ind]

#%% Grid search of hparams
#%% Grid search of hparams
hiddensize = [8, 16, 32, 64, 128, 256]
batchsize = [8, 16, 32, 64, 128, 256]
learningrate = [1e-4, 1e-3, 5e-3, 1e-2]
history = [10, 14]
channels = [[10], [14], [28], [10, 20], [14, 28], [28, 52]]
kernelsize = [2, 3, 4]
activation = [nn.ReLU]
Exemplo n.º 3
0
def pretraining(model, typ, epochs, dropout_prob, sims_fraction, q, simtype = None, featuresize = 7, 
                  save = True, eval_set = None, data_dir = r"/home/fr/fr_fr/fr_mw263"):
    
    """
    
    Args:
        simtype: One out of None (for models 7 and 8) or ParamsFix, normal_params, uniform_params
    """
    # Load the simulations
    
    if typ == 7:
      X, Y = preprocessing.get_simulations(data_dir = os.path.join(data_dir, r"scripts/data/simulations/normal_params"), drop_parameters=False)
    if typ == 8:
      X, Y = preprocessing.get_simulations(data_dir = os.path.join(data_dir, r"scripts/data/simulations/uniform_params"), drop_parameters=False)
    if typ ==6:
      X, Y = preprocessing.get_simulations(data_dir = os.path.join(data_dir, f"scripts/data/simulations/paramsFix"), drop_parameters=True)
    # Architectures selected for Observations.
    if typ == 5:
      X, Y = preprocessing.get_simulations(data_dir = os.path.join(data_dir, f"scripts/data/simulations/paramsFix"), drop_parameters=True)
    if typ == 9:
      X, Y = preprocessing.get_simulations(data_dir = os.path.join(data_dir, f"scripts/data/simulations/paramsFix"), drop_parameters=True)
    if typ == 10:
      X, Y = preprocessing.get_simulations(data_dir = os.path.join(data_dir, f"scripts/data/simulations/uniform_params"), drop_parameters=True)
    
    # Use full simulations or only parts of it?
    if not sims_fraction is None:
        
        print("Using only", sims_fraction, "% of the Simulations")
        ind = int(np.floor(X.shape[0]/100*sims_fraction))
        X, Y = X[:ind], Y[:ind]
    
    # Set the hyperparameters and architecture of the network from preselection.
    hparams, model_design = set_model_parameters(model, typ, epochs, featuresize, X, Y)
    # Directory where fitted network is saved to
    if dropout_prob == 0.0:
      data_dir = os.path.join(data_dir, f"output/models/{model}{typ}/nodropout")
    else:
      data_dir = os.path.join(data_dir, f"output/models/{model}{typ}/dropout")
      
    if not sims_fraction is None:
      data_dir = os.path.join(data_dir, f"sims_frac{sims_fraction}")
    else:
      data_dir = os.path.join(data_dir, f"sims_frac100")
     
    # Load script from which to use train_model_CV
    dev = __import__(f"setup.dev_{model}", fromlist=["selected"])
    
    # Train model
    start = time.time()
    
    running_losses,performance, y_tests, y_preds = dev.train_model_CV(hparams, model_design, 
                                                                  X, Y, 
                                                                  eval_set, dropout_prob,
                                                                  data_dir, save)
    end = time.time()
    
    # Save: Results
    if not simtype is None:
      data_dir = os.path.join(data_dir, f"{simtype}")
      
    # performance returns: rmse_train, rmse_test, mae_train, mae_test in this order.
    performance = np.mean(np.array(performance), axis=0)
    rets = [(end-start), 
            hparams["hiddensize"], hparams["batchsize"], hparams["learningrate"], hparams["history"], model_design["activation"], 
            performance[0], performance[1], performance[2], performance[3]]
    results = pd.DataFrame([rets], 
                           columns=["execution_time", "hiddensize", "batchsize", "learningrate", "history", "activation", "rmse_train", "rmse_val", "mae_train", "mae_val"])
    results.to_csv(os.path.join(data_dir, r"selected_results.csv"), index = False)
    
    # Save: Running losses, ytests and ypreds.
    np.save(os.path.join(data_dir, "running_losses.npy"), running_losses)
    np.save(os.path.join(data_dir, "y_tests.npy"), y_tests)
    np.save(os.path.join(data_dir, "y_preds.npy"), y_preds)
    
Exemplo n.º 4
0
import torch.nn as nn
import setup.preprocessing as preprocessing
import visualizations
import numpy as np
#%%
data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
#%%
X, Y = preprocessing.get_splits(sites=['le_bray'],
                                years=[2001, 2003],
                                datadir=os.path.join(data_dir, "data"),
                                dataset="profound",
                                simulations=None)

#%%
X_sims, Y_sims = preprocessing.get_simulations(data_dir=os.path.join(
    data_dir, r"data\simulations\uniform_params"),
                                               drop_parameters=True)

#%%
hparams = {
    "batchsize": 256,
    "epochs": 1000,
    "history": 7,
    "hiddensize": 128,
    "learningrate": 0.01
}
model_design = {
    "dimensions": [X.shape[1], 128, Y.shape[1]],
    "activation": nn.ReLU,
    "channels": [14, 28],
    "kernelsize": 3
import setup.preprocessing as preprocessing
import visualizations
#%%
data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
X, Y = preprocessing.get_splits(sites = ['bily_kriz'],
                                years = [2001,2003, 2004, 2005, 2006],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)
X_test, Y_test = preprocessing.get_splits(sites = ['bily_kriz'],
                               years = [2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)
X_sims, Y_sims = preprocessing.get_simulations(os.path.join(data_dir, r"data\simulations\uniform_params"),
                                     to_numpy = True,
                                     DOY=False,
                                     standardized=True)
X_sims_ss, Y_sims_ss = X_sims[:2000,], Y_sims[:2000,]
X_sims_ss_test, Y_sims_ss_test = X_sims[3000:3300,], Y_sims[3000:3300,]
#%%
hparams = {"batchsize": 256, 
           "epochs":3000, 
           "history":2, 
           "hiddensize":[32],
           "learningrate":0.01}
model_design = {"dimensions": [X.shape[1], 32, Y.shape[1]],
                "activation": nn.ReLU,
                "featuresize": 7}

#eval_set = {"X_test":X_test, "Y_test":Y_test}
eval_set = {"X_test":X_sims_ss_test, "Y_test":Y_sims_ss_test}