def featureExtractorA( model, typ, epochs, simsfrac, splits=5, data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"): hparams, model_design, X, Y, X_test, Y_test = settings( model, epochs, data_dir) X = torch.tensor(X).type(dtype=torch.float) X_test = torch.tensor(X_test).type(dtype=torch.float) predictions = [] mae_train = [] rmse_train = [] mae_val = [] rmse_val = [] for i in range(splits): model = models.MLPmod(model_design["featuresize"], model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load( os.path.join( data_dir, f"python\outputs\models\mlp{typ}\\nodropout\sims_frac{simsfrac}\model{i}.pth" ))) preds_test = model(X_test).detach().numpy() preds_train = model(X).detach().numpy() mae_val.append(metrics.mean_absolute_error(Y_test, preds_test)) rmse_val.append(utils.rmse(Y_test, preds_test)) mae_train.append(metrics.mean_absolute_error(Y, preds_train)) rmse_train.append(utils.rmse(Y, preds_train)) predictions.append(preds_test) errors = [rmse_train, rmse_val, mae_train, mae_val] return predictions, errors, Y_test
def train_model_CV(hparams, model_design, X, Y, eval_set, dropout_prob, dropout, data_dir, save, splits=5): """ """ epochs = hparams["epochs"] featuresize = model_design["featuresize"] kf = KFold(n_splits=splits, shuffle=False) kf.get_n_splits(X) rmse_train = np.zeros((splits, epochs)) rmse_val = np.zeros((splits, epochs)) mae_train = np.zeros((splits, epochs)) mae_val = np.zeros((splits, epochs)) # z-score data #X_mean, X_std = np.mean(X), np.std(X) #X = utils.minmax_scaler(X) if not eval_set is None: print("Test set used for model evaluation") Xt_test = eval_set["X_test"] yt_test = eval_set["Y_test"] #Xt_test= utils.minmax_scaler(Xt_test, scaling = [X_mean, X_std]) yt_test = torch.tensor(yt_test).type(dtype=torch.float) Xt_test = torch.tensor(Xt_test).type(dtype=torch.float) #yt_tests = [] i = 0 performance = [] y_tests = [] y_preds = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] X_test = torch.tensor(X_test).type(dtype=torch.float) y_test = torch.tensor(y_test).type(dtype=torch.float) X_train = torch.tensor(X_train).type(dtype=torch.float) y_train = torch.tensor(y_train).type(dtype=torch.float) if featuresize is None: model = models.MLP(model_design["dimensions"], model_design["activation"]) else: model = models.MLPmod(featuresize, model_design["dimensions"], model_design["activation"], dropout_prob, dropout) optimizer = optim.Adam(model.parameters(), lr=hparams["learningrate"]) criterion = nn.MSELoss() for epoch in range(epochs): # Training model.train() x, y = utils.create_batches(X_train, y_train, hparams["batchsize"], hparams["history"]) x = torch.tensor(x).type(dtype=torch.float) y = torch.tensor(y).type(dtype=torch.float) output = model(x) # Compute training loss loss = criterion(output, y) optimizer.zero_grad() loss.backward() optimizer.step() # Evaluate current model at test set model.eval() with torch.no_grad(): pred_train = model(X_train) if eval_set is None: pred_test = model(X_test) rmse_train[i, epoch] = utils.rmse(y_train, pred_train) rmse_val[i, epoch] = utils.rmse(y_test, pred_test) mae_train[i, epoch] = metrics.mean_absolute_error( y_train, pred_train) mae_val[i, epoch] = metrics.mean_absolute_error( y_test, pred_test) else: pred_test = model(Xt_test) rmse_train[i, epoch] = utils.rmse(y_train, pred_train) rmse_val[i, epoch] = utils.rmse(yt_test, pred_test) mae_train[i, epoch] = metrics.mean_absolute_error( y_train, pred_train) mae_val[i, epoch] = metrics.mean_absolute_error( yt_test, pred_test) if save: if epoch % 1000 == 0: print("Epoch", epoch, ": Saving model to path.") torch.save(model.state_dict(), os.path.join(data_dir, f"model{i}.pth")) # Predict with fitted model with torch.no_grad(): preds_train = model(X_train) if eval_set is None: preds_test = model(X_test) performance.append([ utils.rmse(y_train, preds_train), utils.rmse(y_test, preds_test), metrics.mean_absolute_error(y_train, preds_train.numpy()), metrics.mean_absolute_error(y_test, preds_test.numpy()) ]) else: preds_test = model(Xt_test) performance.append([ utils.rmse(y_train, preds_train), utils.rmse(yt_test, preds_test), metrics.mean_absolute_error(y_train, preds_train.numpy()), metrics.mean_absolute_error(yt_test, preds_test.numpy()) ]) if eval_set is None: y_tests.append(y_test.numpy()) else: y_tests.append(yt_test.numpy()) y_preds.append(preds_test.numpy()) i += 1 running_losses = { "rmse_train": rmse_train, "mae_train": mae_train, "rmse_val": rmse_val, "mae_val": mae_val } return (running_losses, performance, y_tests, y_preds)
def training_CV(hparams, model_design, X, Y, feature_extraction, eval_set, featuresize, data_dir, save, splits=5): """ """ epochs = hparams["epochs"] kf = KFold(n_splits=splits, shuffle=False) kf.get_n_splits(X) rmse_train = np.zeros((splits, epochs)) rmse_val = np.zeros((splits, epochs)) mae_train = np.zeros((splits, epochs)) mae_val = np.zeros((splits, epochs)) # z-score data #X_mean, X_std = np.mean(X), np.std(X) #X = utils.minmax_scaler(X) if not eval_set is None: print("Test set used for model evaluation") Xt_test = eval_set["X_test"] #Xt_test= utils.minmax_scaler(Xt_test, scaling = [X_mean, X_std]) yt_test = eval_set["Y_test"] yt_test = torch.tensor(yt_test).type(dtype=torch.float) Xt_test = torch.tensor(Xt_test).type(dtype=torch.float) yt_tests = [] i = 0 performance = [] y_tests = [] y_preds = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] X_test = torch.tensor(X_test).type(dtype=torch.float) y_test = torch.tensor(y_test).type(dtype=torch.float) X_train = torch.tensor(X_train).type(dtype=torch.float) y_train = torch.tensor(y_train).type(dtype=torch.float) if isinstance(model_design, dict): print("Loading pretrained Model.") model = models.MLPmod(featuresize, model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load(os.path.join(data_dir, f"model{i}.pth"))) else: model = model_design model.eval() if not feature_extraction is None: print("Freezing all weights.") for child in model.children(): for name, parameter in child.named_parameters(): if not name in feature_extraction: parameter.requires_grad = False #else: # parameter.requires_grad = False criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=hparams["learningrate"]) for epoch in range(epochs): # Training model.train() x, y = utils.create_batches(X_train, y_train, hparams["batchsize"], hparams["history"]) x = torch.tensor(x).type(dtype=torch.float) y = torch.tensor(y).type(dtype=torch.float) output = model(x) # Compute training loss loss = criterion(output, y) optimizer.zero_grad() loss.backward() optimizer.step() # Evaluate current model at test set model.eval() with torch.no_grad(): pred_train = model(X_train) if eval_set is None: pred_test = model(X_test) rmse_train[i, epoch] = utils.rmse(y_train, pred_train) rmse_val[i, epoch] = utils.rmse(y_test, pred_test) mae_train[i, epoch] = metrics.mean_absolute_error( y_train, pred_train) mae_val[i, epoch] = metrics.mean_absolute_error( y_test, pred_test) else: pred_test = model(Xt_test) rmse_train[i, epoch] = utils.rmse(y_train, pred_train) rmse_val[i, epoch] = utils.rmse(yt_test, pred_test) mae_train[i, epoch] = metrics.mean_absolute_error( y_train, pred_train) mae_val[i, epoch] = metrics.mean_absolute_error( yt_test, pred_test) # Predict with fitted model with torch.no_grad(): preds_train = model(X_train) if eval_set is None: preds_test = model(X_test) performance.append([ utils.rmse(y_train, preds_train), utils.rmse(y_test, preds_test), metrics.mean_absolute_error(y_train, preds_train.numpy()), metrics.mean_absolute_error(y_test, preds_test.numpy()) ]) else: preds_test = model(Xt_test) performance.append([ utils.rmse(y_train, preds_train), utils.rmse(yt_test, preds_test), metrics.mean_absolute_error(y_train, preds_train.numpy()), metrics.mean_absolute_error(yt_test, preds_test.numpy()) ]) if save: if not feature_extraction is None: torch.save( model.state_dict(), os.path.join(data_dir, f"tuned\setting1\model{i}.pth")) else: torch.save( model.state_dict(), os.path.join(data_dir, f"tuned\setting0\model{i}.pth")) y_tests.append(y_test.numpy()) y_preds.append(preds_test.numpy()) i += 1 running_losses = { "rmse_train": rmse_train, "mae_train": mae_train, "rmse_val": rmse_val, "mae_val": mae_val } if eval_set is None: return (running_losses, performance, y_tests, y_preds) else: return (running_losses, performance, yt_tests, y_preds)
def featureExtractorD( model, typ, epochs, simsfrac, splits=5, data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"): hparams, model_design, X, Y, X_test, Y_test = settings( "mlp", None, data_dir) hparams_add, model_design_add, X, Y, X_test, Y_test = settings("mlp", epochs, data_dir, sims=False) X = torch.tensor(X).type(dtype=torch.float) X_test = torch.tensor(X_test).type(dtype=torch.float) errors = [] preds_tests = [] for i in range(splits): # Load pretrained model model = models.MLPmod(model_design["featuresize"], model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load( os.path.join( data_dir, f"python\outputs\models\mlp{typ}\\nodropout\sims_frac{simsfrac}\model{i}.pth" ))) # modify classifier model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # extract features out_train = model(X).detach().numpy() out_test = model(X_test).detach().numpy() # specify dimensions of model to train (architecture 2) model_design_add["dimensions"].insert(0, out_train.shape[1]) # Train mlp with extracted features as input, predicting Y. running_losses, pred_test = train_model(hparams_add, model_design_add, out_train, Y, out_test, Y_test, i) # Evaluate model (reload it.) model = models.MLP(model_design_add["dimensions"], model_design_add["activation"]) model.load_state_dict( torch.load( os.path.join( data_dir, f"python\outputs\models\mlp7\\nodropout\sims_frac30\\tuned\setting2\model{i}.pth" ))) preds_test = model( torch.tensor(out_test).type(dtype=torch.float)).detach().numpy() preds_train = model( torch.tensor(out_train).type(dtype=torch.float)).detach().numpy() errors.append([ utils.rmse(Y, preds_train), utils.rmse(Y_test, preds_test), metrics.mean_absolute_error(Y, preds_train), metrics.mean_absolute_error(Y_test, preds_test) ]) preds_tests.append(preds_test) return (running_losses, errors, preds_test)
def featureExtractorC( model, typ, epochs, simsfrac, classifier="ols", splits=5, data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"): hparams, model_design, X, Y, X_test, Y_test = settings( model, epochs, data_dir) X = torch.tensor(X).type(dtype=torch.float) X_test = torch.tensor(X_test).type(dtype=torch.float) predictions_train = [] predictions_test = [] for i in range(splits): model = models.MLPmod(model_design["featuresize"], model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load( os.path.join( data_dir, f"python\outputs\models\mlp{typ}\\nodropout\sims_frac{simsfrac}\model{i}.pth" ))) model.classifier = nn.Sequential( *list(model.classifier.children()) [:-1]) # Remove Final layer and activation. out_train = model(X).detach().numpy() out_train = sm.add_constant(out_train) # Add intercept. out_test = model(X_test).detach().numpy() out_test = sm.add_constant(out_test) # Add intercept. if classifier == "ols": extractor = sm.OLS(Y, out_train) results = extractor.fit() predictions_train.append(np.expand_dims(results.predict(), axis=1)) predictions_test.append( np.expand_dims(results.predict(out_test), axis=1)) elif classifier == "glm": print("Fitting glm with Inverse Gaussian family and log-Link.") extractor = sm.GLM(Y, out_train, family=sm.families.InverseGaussian( sm.families.links.log())) results = extractor.fit() predictions_train.append(np.expand_dims(results.predict(), axis=1)) predictions_test.append( np.expand_dims(results.predict(out_test), axis=1)) elif classifier == "nnls": theta = np.expand_dims(nnls(out_train, Y[:, 0])[0], axis=1) predictions_train.append(np.dot(out_train, theta)) predictions_test.append(np.dot(out_test, theta)) else: print("Don't know classifier.") mae_train = [ metrics.mean_absolute_error(Y, sublist) for sublist in predictions_train ] mae_val = [ metrics.mean_absolute_error(Y_test, sublist) for sublist in predictions_test ] rmse_train = [utils.rmse(Y, sublist) for sublist in predictions_train] rmse_val = [utils.rmse(Y_test, sublist) for sublist in predictions_test] errors = [rmse_train, rmse_val, mae_train, mae_val] return predictions_test, errors
import os.path import numpy as np #%% Load Data: Profound in and out. datadir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt" X, Y = preprocessing.get_splits(sites=['hyytiala'], years=[2001, 2002, 2003, 2004, 2005, 2006], datadir=os.path.join(datadir, "data"), dataset="profound", simulations=None) #%% pretrained_model = visualizations.losses("mlp", 7, "") running_losses, performance, y_tests, y_preds = finetuning.finetune( X, Y, epochs=100, model="mlp", pretrained_type=7) #%% visualizations.plot_running_losses(running_losses["mae_train"], running_losses["mae_val"], "", "mlp") print(np.mean(np.array(performance), axis=0)) res_mlp = visualizations.losses("mlp", 0, "") #%% import setup.models as models import torch model = models.MLPmod(7, [64, 64, 16, 1], nn.ReLU) model.load_state_dict( torch.load(os.path.join(datadir, f"python\outputs\models\mlp6\model0.pth")))
def featureExtractorA( typ, epochs, simsfrac, dummies, sparse=None, years=[2001, 2002, 2003, 2004, 2005, 2006, 2007], random_days=None, splits=5, data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"): hparams, model_design, X, Y, X_test, Y_test = settings( typ, epochs, data_dir, dummies, sparse, years=years, random_days=random_days) if ((typ == 4) | (typ == 9) | (typ == 10) | (typ == 12) | (typ == 13) | (typ == 14)): model_design["featuresize"] = None X = torch.tensor(X).type(dtype=torch.float) X_test = torch.tensor(X_test).type(dtype=torch.float) predictions = [] mae_train = [] rmse_train = [] mae_val = [] rmse_val = [] for i in range(splits): if ((typ == 4) | (typ == 9) | (typ == 10) | (typ == 11) | (typ == 12) | (typ == 13) | (typ == 14)): model = models.MLP(model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load( os.path.join( data_dir, f"python\outputs\models\mlp{typ}\\nodropout\sims_frac{simsfrac}\model{i}.pth" ))) elif typ == 7: print("load for model", typ) model = models.MLPmod(model_design["featuresize"], model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load( os.path.join( data_dir, f"python\outputs\models\mlp{typ}\\nodropout\sims_frac{simsfrac}\model{i}.pth" ))) elif typ == 5: print("load for model", typ) model = models.MLPmod(model_design["featuresize"], model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load( os.path.join( data_dir, f"python\outputs\models\mlp{typ}\\nodropout\dummies\sims_frac{simsfrac}\model{i}.pth" ))) preds_test = model(X_test).detach().numpy() preds_train = model(X).detach().numpy() mae_val.append(metrics.mean_absolute_error(Y_test, preds_test)) rmse_val.append(utils.rmse(Y_test, preds_test)) mae_train.append(metrics.mean_absolute_error(Y, preds_train)) rmse_train.append(utils.rmse(Y, preds_train)) predictions.append(preds_test) errors = [rmse_train, rmse_val, mae_train, mae_val] return predictions, errors
for mod in mods: hparams, model_design, X, Y, X_test, Y_test = finetuning.settings( "mlp", mod, None, data_dir, dummies) X_test = torch.tensor(X_test).type(dtype=torch.float) y_test = torch.tensor(Y_test).type(dtype=torch.float) X_train = torch.tensor(X).type(dtype=torch.float) y_train = torch.tensor(Y).type(dtype=torch.float) if ((mod > 7) | (mod == 0) | (mod == 4)): model = models.MLP(model_design["dimensions"], model_design["activation"]) else: model = models.MLPmod(model_design["featuresize"], model_design["dimensions"], model_design["activation"]) model.load_state_dict( torch.load( os.path.join(data_dir, f"python\outputs\models\mlp{mod}\\relu\model0.pth"))) pytorch_total_params = sum(p.numel() for p in model.parameters()) rmse_train = [] rmse_val = [] mae_train = [] mae_val = [] for i in range(5): model.load_state_dict(