def random_cv(cv_index, cv_year, roothpath, param_grid, num_random, model_name, device, one_day): """Hyperparameter tuning through random search Args: cv_index: the month of the valiation set cv_year: the year of the valiation set rootpath: the path where training-validtion sets are saved param_grid: a dictionary, consisting the grid of hyperparameters num_randon: the number of sets of hyperparameters to evaluate(tune) model_name: a string representing the name of a model device: indicates if the model should be run on cpu or gpu one_day: True or False, indicating if only the most recent available day is used for training a model (XGBoost or Lasso) """ # load data if model_name in ['CNN_LSTM', 'CNN_FNN']: train_X = joblib.load( rootpath + 'train_X_map_{}_forecast{}.pkl'.format(cv_year, cv_index)) valid_X = joblib.load( rootpath + 'val_X_map_{}_forecast{}.pkl'.format(cv_year, cv_index)) train_y = load_results( rootpath + 'train_y_pca_{}_forecast{}.pkl'.format(cv_year, cv_index)) valid_y = load_results( rootpath + 'val_y_pca_{}_forecast{}.pkl'.format(cv_year, cv_index)) output_dim = train_y.shape[-1] else: train_X = load_results( rootpath + 'train_X_pca_{}_forecast{}.pkl'.format(cv_year, cv_index)) valid_X = load_results( rootpath + 'val_X_pca_{}_forecast{}.pkl'.format(cv_year, cv_index)) train_y = load_results( rootpath + 'train_y_pca_{}_forecast{}.pkl'.format(cv_year, cv_index)) valid_y = load_results( rootpath + 'val_y_pca_{}_forecast{}.pkl'.format(cv_year, cv_index)) # set input and output dim input_dim = train_X.shape[-1] output_dim = train_y.shape[-1] if model_name == 'EncoderFNN_AllSeq_AR_CI' or model_name == 'EncoderFNN_AllSeq_AR': hidden_dim = param_grid['hidden_dim'] num_layers = param_grid['num_layers'] lr = param_grid['learning_rate'] threshold = param_grid['threshold'] num_epochs = param_grid['num_epochs'] seq_len = param_grid['seq_len'] linear_dim = param_grid['linear_dim'] drop_out = param_grid['drop_out'] if model_name == 'EncoderFNN_AllSeq_AR_CI': ci_dim = param_grid['ci_dim'] train_y_ar = load_results( rootpath + 'train_y_pca_ar_{}_forecast{}.pkl'.format(cv_year, cv_index)) valid_y_ar = load_results( rootpath + 'val_y_pca_ar_{}_forecast{}.pkl'.format(cv_year, cv_index)) train_dataset = model.MapDataset_ar(train_X, train_y_ar, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=False) elif model_name == 'EncoderDecoder' or model_name == 'EncoderFNN_AllSeq' or model_name == 'EncoderFNN': train_dataset = model.MapDataset(train_X, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=False) hidden_dim = param_grid['hidden_dim'] num_layers = param_grid['num_layers'] lr = param_grid['learning_rate'] threshold = param_grid['threshold'] num_epochs = param_grid['num_epochs'] if model_name == 'EncoderDecoder': decoder_len = param_grid['decoder_len'] elif model_name == 'EncoderFNN': last_layer = param_grid['last_layer'] seq_len = param_grid['seq_len'] elif model_name == 'EncoderFNN_AllSeq': seq_len = param_grid['seq_len'] linear_dim = param_grid['linear_dim'] drop_out = param_grid['drop_out'] elif model_name == 'XGBoost': if one_day is True: train_X = train_X[:, -1, :] # one day valid_X = valid_X[:, -1, :] # one day train_X = np.reshape(train_X, (train_X.shape[0], -1)) valid_X = np.reshape(valid_X, (valid_X.shape[0], -1)) max_depth = param_grid['max_depth'] colsample_bytree = param_grid['colsample_bytree'] gamma = param_grid['gamma'] n_estimators = param_grid['n_estimators'] lr = param_grid['learning_rate'] elif model_name == 'Lasso': if one_day is True: train_X = train_X[:, -1, :] # one day valid_X = valid_X[:, -1, :] # one day train_X = np.reshape(train_X, (train_X.shape[0], -1)) valid_X = np.reshape(valid_X, (valid_X.shape[0], -1)) alphas = param_grid['alpha'] elif model_name == 'FNN': if one_day is True: train_X = train_X[:, -1, :] # one day valid_X = valid_X[:, -1, :] # one day train_X = np.reshape(train_X, (train_X.shape[0], -1)) valid_X = np.reshape(valid_X, (valid_X.shape[0], -1)) train_dataset = model.MapDataset(train_X, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=False) hidden_dim = param_grid['hidden_dim'] num_layers = param_grid['num_layers'] elif model_name == 'CNN_FNN': train_dataset = model.MapDataset_CNN(train_X, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=50, shuffle=False) stride = param_grid['stride'] kernel_size = param_grid['kernel_size'] hidden_dim = param_grid['hidden_dim'] num_layers = param_grid['num_layers'] elif model_name == 'CNN_LSTM': train_dataset = model.MapDataset_CNN(train_X, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=50, shuffle=False) stride = param_grid['module__stride'] kernel_size = param_grid['module__kernel_size'] hidden_dim = param_grid['module__hidden_dim'] num_lstm_layers = param_grid['module__num_lstm_layers'] lr = param_grid['lr'] num_epochs = param_grid['module__num_epochs'] else: print('the model name is not in the list') history_all = [] score = [] parameter_all = [] for i in range(num_random): # set model if model_name == 'EncoderDecoder': curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layer = num_layers[randint(0, len(num_layers) - 1)] curr_decoder_len = decoder_len[randint(0, len(decoder_len) - 1)] curr_threshold = threshold[randint(0, len(threshold) - 1)] curr_lr = lr[randint(0, len(lr) - 1)] curr_num_epochs = num_epochs[randint(0, len(num_epochs) - 1)] parameters = { 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layer, 'decoder_len': curr_decoder_len, 'threshold': curr_threshold, 'learning_rate': curr_lr, 'num_epochs': curr_num_epochs } parameter_all.append(parameters) mdl = model.EncoderDecoder(input_dim=input_dim, output_dim=output_dim, hidden_dim=curr_hidden_dim, num_layers=curr_num_layer, learning_rate=curr_lr, decoder_len=curr_decoder_len, threshold=curr_threshold, num_epochs=curr_num_epochs) # initialize the model model.init_weight(mdl) # send model to gpu mdl.to(device) # fit the model history = mdl.fit_cv(train_loader, valid_X, valid_y, device) # compute the prediction of validation set pred_y = mdl.predict(valid_X, device) elif model_name == 'EncoderFNN': curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layer = num_layers[randint(0, len(num_layers) - 1)] curr_seq_len = seq_len[randint(0, len(seq_len) - 1)] curr_threshold = threshold[randint(0, len(threshold) - 1)] curr_lr = lr[randint(0, len(lr) - 1)] curr_num_epochs = num_epochs[randint(0, len(num_epochs) - 1)] curr_last_layer = last_layer[randint(0, len(last_layer) - 1)] parameters = { 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layer, 'last_layer': curr_last_layer, 'threshold': curr_threshold, 'learning_rate': curr_lr, 'num_epochs': curr_num_epochs, 'seq_len': curr_seq_len } parameter_all.append(parameters) mdl = model.EncoderFNN(input_dim=input_dim, output_dim=output_dim, hidden_dim=curr_hidden_dim, num_layers=curr_num_layer, last_layer=curr_last_layer, seq_len=curr_seq_len, learning_rate=curr_lr, threshold=curr_threshold, num_epochs=curr_num_epochs) # initialize the model model.init_weight(mdl) # send model to gpu mdl.to(device) # fit the model history = mdl.fit_cv(train_loader, valid_X, valid_y, device) # compute the prediction of validation set pred_y = mdl.predict(valid_X, device) elif model_name == 'EncoderFNN_AllSeq': curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layer = num_layers[randint(0, len(num_layers) - 1)] curr_seq_len = seq_len[randint(0, len(seq_len) - 1)] curr_threshold = threshold[randint(0, len(threshold) - 1)] curr_lr = lr[randint(0, len(lr) - 1)] curr_num_epochs = num_epochs[randint(0, len(num_epochs) - 1)] curr_linear_dim = linear_dim[randint(0, len(linear_dim) - 1)] curr_drop_out = drop_out[randint(0, len(drop_out) - 1)] parameters = { 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layer, 'linear_dim': curr_linear_dim, 'threshold': curr_threshold, 'learning_rate': curr_lr, 'num_epochs': curr_num_epochs, 'seq_len': curr_seq_len, 'drop_out': curr_drop_out } parameter_all.append(parameters) mdl = model.EncoderFNN_AllSeq(input_dim=input_dim, output_dim=output_dim, hidden_dim=curr_hidden_dim, num_layers=curr_num_layer, seq_len=curr_seq_len, linear_dim=curr_linear_dim, learning_rate=curr_lr, dropout=curr_drop_out, threshold=curr_threshold, num_epochs=curr_num_epochs) # initialize the model model.init_weight(mdl) # send model to gpu mdl.to(device) # fit the model history = mdl.fit_cv(train_loader, valid_X, valid_y, device) # compute the prediction of validation set pred_y = mdl.predict(valid_X, device) elif model_name == 'EncoderFNN_AllSeq_AR': curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layer = num_layers[randint(0, len(num_layers) - 1)] curr_seq_len = seq_len[randint(0, len(seq_len) - 1)] curr_threshold = threshold[randint(0, len(threshold) - 1)] curr_lr = lr[randint(0, len(lr) - 1)] curr_num_epochs = num_epochs[randint(0, len(num_epochs) - 1)] curr_linear_dim = linear_dim[randint(0, len(linear_dim) - 1)] curr_drop_out = drop_out[randint(0, len(drop_out) - 1)] parameters = { 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layer, 'linear_dim': curr_linear_dim, 'threshold': curr_threshold, 'learning_rate': curr_lr, 'num_epochs': curr_num_epochs, 'seq_len': curr_seq_len, 'drop_out': curr_drop_out } parameter_all.append(parameters) mdl = model.EncoderFNN_AllSeq_AR(input_dim=input_dim, output_dim=output_dim, hidden_dim=curr_hidden_dim, num_layers=curr_num_layer, seq_len=curr_seq_len, linear_dim=curr_linear_dim, learning_rate=curr_lr, dropout=curr_drop_out, threshold=curr_threshold, num_epochs=curr_num_epochs) # initialize the model model.init_weight(mdl) # send model to gpu mdl.to(device) # fit the model history = mdl.fit_cv(train_loader, valid_X, valid_y_ar, valid_y, device) # compute the prediction of validation set pred_y = mdl.predict(valid_X, valid_y_ar, device) elif model_name == 'EncoderFNN_AllSeq_AR_CI': curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layer = num_layers[randint(0, len(num_layers) - 1)] curr_seq_len = seq_len[randint(0, len(seq_len) - 1)] curr_threshold = threshold[randint(0, len(threshold) - 1)] curr_lr = lr[randint(0, len(lr) - 1)] curr_num_epochs = num_epochs[randint(0, len(num_epochs) - 1)] curr_linear_dim = linear_dim[randint(0, len(linear_dim) - 1)] curr_drop_out = drop_out[randint(0, len(drop_out) - 1)] parameters = { 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layer, 'linear_dim': curr_linear_dim, 'threshold': curr_threshold, 'learning_rate': curr_lr, 'num_epochs': curr_num_epochs, 'seq_len': curr_seq_len, 'drop_out': curr_drop_out, 'ci_dim': ci_dim } parameter_all.append(parameters) mdl = model.EncoderFNN_AllSeq_AR_CI(input_dim=input_dim - ci_dim, output_dim=output_dim, hidden_dim=curr_hidden_dim, num_layers=curr_num_layer, seq_len=curr_seq_len, linear_dim=curr_linear_dim, ci_dim=ci_dim, learning_rate=curr_lr, dropout=curr_drop_out, threshold=curr_threshold, num_epochs=curr_num_epochs) # initialize the model model.init_weight(mdl) # send model to gpu mdl.to(device) # fit the model history = mdl.fit_cv(train_loader, valid_X, valid_y_ar, valid_y, device) pred_y = mdl.predict(valid_X, valid_y_ar, device) elif model_name == 'XGBoost': curr_max_depth = max_depth[randint(0, len(max_depth) - 1)] curr_colsample_bytree = colsample_bytree[randint( 0, len(colsample_bytree) - 1)] curr_gamma = gamma[randint(0, len(gamma) - 1)] curr_n_estimators = n_estimators[randint(0, len(n_estimators) - 1)] curr_lr = lr[randint(0, len(lr) - 1)] parameters = { 'max_depth': curr_max_depth, 'colsample_bytree': curr_colsample_bytree, 'gamma': curr_gamma, 'n_estimators': curr_n_estimators, 'learning_rate': curr_lr } parameter_all.append(parameters) mdl = model.XGBMultitask(num_models=output_dim, colsample_bytree=curr_colsample_bytree, gamma=curr_gamma, learning_rate=curr_lr, max_depth=curr_max_depth, n_estimators=curr_n_estimators, objective='reg:squarederror') # history = mdl.fit_cv(train_X, train_y, valid_X, valid_y) mdl.fit(train_X, train_y) pred_y = mdl.predict(valid_X) history = None elif model_name == 'Lasso': curr_alpha = alphas[randint(0, len(alphas) - 1)] parameter = {'alpha': curr_alpha} parameter_all.append(parameter) mdl = model.LassoMultitask(alpha=curr_alpha, fit_intercept=False) mdl.fit(train_X, train_y) pred_y = mdl.predict(valid_X) history = None elif model_name == 'FNN': curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layers = num_layers[randint(0, len(num_layers) - 1)] parameters = { 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layers } parameter_all.append(parameters) mdl = model.ReluNet(input_dim=input_dim, output_dim=output_dim, hidden_dim=curr_hidden_dim, num_layers=curr_num_layers, threshold=0.1, num_epochs=1000) model.init_weight(mdl) mdl.to(device) history = mdl.fit_cv(train_loader, valid_X, valid_y, device) pred_y = mdl.predict(valid_X, device) elif model_name == 'CNN_FNN': curr_stride = stride[randint(0, len(stride) - 1)] curr_kernel_size = kernel_size[randint(0, len(kernel_size) - 1)] curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layers = num_layers[randint(0, len(num_layers) - 1)] parameters = { 'stride': curr_stride, 'kernel_size': curr_kernel_size, 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layers } parameter_all.append(parameters) num_var = len(train_X) input_dim = model.get_input_dim(train_X, num_var, curr_stride, curr_kernel_size) mdl = model.CnnFnn(num_var, input_dim, output_dim, kernel_size=curr_kernel_size, stride=curr_stride, hidden_dim=curr_hidden_dim, num_layers=curr_num_layers, num_epochs=100) mdl.to(device) history = mdl.fit_cv(train_loader, valid_X, valid_y, device) pred_y = mdl.predict(valid_X, device) elif model_name == 'CNN_LSTM': curr_stride = stride[randint(0, len(stride) - 1)] curr_kernel_size = kernel_size[randint(0, len(kernel_size) - 1)] curr_hidden_dim = hidden_dim[randint(0, len(hidden_dim) - 1)] curr_num_layers = num_lstm_layers[randint(0, len(num_lstm_layers) - 1)] curr_lr = lr[randint(0, len(lr) - 1)] curr_num_epochs = num_epochs[randint(0, len(num_epochs) - 1)] parameters = { 'stride': curr_stride, 'kernel_size': curr_kernel_size, 'hidden_dim': curr_hidden_dim, 'num_layers': curr_num_layers, 'learning_rate': curr_lr, 'num_epochs': curr_num_epochs } parameter_all.append(parameters) num_var = len(train_X) input_dim = model.get_input_dim(train_X, num_var, curr_stride, curr_kernel_size) mdl = model.CnnLSTM(num_var, input_dim, output_dim, kernel_size=curr_kernel_size, stride=curr_stride, hidden_dim=curr_hidden_dim, num_lstm_layers=curr_num_layers, num_epochs=curr_num_epochs, learning_rate=curr_lr) mdl.to(device) history = mdl.fit_cv(train_loader, valid_X, valid_y, device) pred_y = mdl.predict(valid_X, device) history_all.append(history) test_rmse = np.sqrt(((valid_y - pred_y)**2).mean()) test_cos = np.asarray([ compute_cosine(valid_y[i, :], pred_y[i, :]) for i in range(len(valid_y)) ]).mean() score.append([test_rmse, test_cos]) cv_results = { 'score': score, 'parameter_all': parameter_all, 'history_all': history_all } save_results( rootpath + 'cv_results_test/cv_results_' + model_name + '_{}_{}.pkl'.format(cv_year, cv_index), cv_results)
def forecast_dl(month_id, year, rootpath, param_path, device, model_name): """Run deep learning models (CNN, FNN, CNN_LSTM) - results are saved in a folder named forecast_results Args: month_id: an int indicating the month which is being forecasted year: an int indicating the year which is being forecasted rootpath: the path where the training and test sets are saved param_path: the path where the best hyperparameters are saved device: an indication if the model is runing on GPU or CPU model_name: a string indicating the name of a model """ results = {} results['prediction_train'] = [] results['prediction_test'] = [] if model_name in ['CNN_LSTM', 'CNN_FNN']: train_X = joblib.load( rootpath + 'train_X_map_{}_forecast{}.pkl'.format(year, month_id)) test_X = joblib.load( rootpath + 'test_X_map_{}_forecast{}.pkl'.format(year, month_id)) train_y = load_results( rootpath + 'train_y_pca_{}_forecast{}.pkl'.format(year, month_id)) test_y = load_results( rootpath + 'test_y_pca_{}_forecast{}.pkl'.format(year, month_id)) output_dim = train_y.shape[-1] else: train_X = load_results( rootpath + 'train_X_pca_{}_forecast{}.pkl'.format(year, month_id)) test_X = load_results( rootpath + 'test_X_pca_{}_forecast{}.pkl'.format(year, month_id)) train_y = load_results( rootpath + 'train_y_pca_{}_forecast{}.pkl'.format(year, month_id)) test_y = load_results( rootpath + 'test_y_pca_{}_forecast{}.pkl'.format(year, month_id)) # set input and output dim input_dim = train_X.shape[-1] output_dim = train_y.shape[-1] best_parameter = load_results( param_path + '{}_forecast{}.pkl'.format(model_name, month_id)) if model_name == 'FNN': train_X = np.reshape(train_X, (train_X.shape[0], -1)) test_X = np.reshape(test_X, (test_X.shape[0], -1)) input_dim = input_dim = train_X.shape[-1] train_dataset = model.MapDataset(train_X, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=False) curr_hidden_dim = best_parameter['hidden_dim'] curr_num_layers = best_parameter['num_layers'] mdl = model.ReluNet(input_dim=input_dim, output_dim=output_dim, hidden_dim=curr_hidden_dim, num_layers=curr_num_layers, threshold=0.1, num_epochs=1000) elif model_name == 'CNN_FNN': train_dataset = model.MapDataset_CNN(train_X, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=50, shuffle=False) curr_stride = best_parameter['stride'] curr_kernel_size = best_parameter['kernel_size'] curr_hidden_dim = best_parameter['hidden_dim'] curr_num_layers = best_parameter['num_layers'] num_var = len(train_X) input_dim = model.get_input_dim(train_X, num_var, curr_stride, curr_kernel_size) mdl = model.CnnFnn(num_var, input_dim, output_dim, kernel_size=curr_kernel_size, stride=curr_stride, hidden_dim=curr_hidden_dim, num_layers=curr_num_layers, num_epochs=100) elif model_name == 'CNN_LSTM': train_dataset = model.MapDataset_CNN(train_X, train_y) train_loader = DataLoader(dataset=train_dataset, batch_size=50, shuffle=False) curr_stride = best_parameter['stride'] curr_kernel_size = best_parameter['kernel_size'] curr_hidden_dim = best_parameter['hidden_dim'] curr_num_layers = best_parameter['num_layers'] curr_lr = best_parameter['learning_rate'] curr_num_epochs = best_parameter['num_epochs'] num_var = len(train_X) input_dim = model.get_input_dim(train_X, num_var, curr_stride, curr_kernel_size) mdl = model.CnnLSTM(num_var, input_dim, output_dim, kernel_size=curr_kernel_size, stride=curr_stride, hidden_dim=curr_hidden_dim, num_lstm_layers=curr_num_layers, num_epochs=curr_num_epochs, learning_rate=curr_lr) model.init_weight(mdl) # send model to gpu mdl.to(device) mdl.fit(train_loader, device) state = {'state_dict': mdl.state_dict()} torch.save( state, rootpath + 'models/{}_{}_{}.t7'.format(model_name, year, month_id)) pred_train = mdl.predict(train_X, device) pred_test = mdl.predict(test_X, device) results['target_train'] = train_y results['prediction_train'] = pred_train results['target_test'] = test_y results['prediction_test'] = pred_test save_results( rootpath + 'forecast_results/results_{}_{}_{}.pkl'.format( model_name, year, month_id), results)