def evaluate_ensemble(cfg_path): with open(cfg_path, "r") as ymlfile: ensemble_cfg = Box(yaml.safe_load(ymlfile)) experiment_folder = os.path.join(ensemble_cfg.storage_folder, ensemble_cfg.ensemble_name) print(experiment_folder) ensemble_members = glob.glob( os.path.join(*[experiment_folder, "**", "forecast.csv"])) print(ensemble_members) ensemble_members = [ memb for memb in ensemble_members if "seasonal" not in memb ] print(ensemble_members) df_list = [] for member_fname in ensemble_members: print(member_fname) df = pd.read_csv(member_fname) df_list.append(df) df_concat = pd.concat(df_list) by_row = df_concat.groupby(df_concat.index) df_median = by_row.median() df_mean = by_row.mean() preds_median = df_median.values preds_mean = df_mean.values print("scoring median") print(f"val_set: {ensemble_cfg.val_set}") res_median = score_M4(predictions=preds_median, df_results_name=os.path.join( experiment_folder, "result_median_val.csv"), val=ensemble_cfg.val_set) print("scoring mean") res_mean = score_M4(predictions=preds_mean, df_results_name=os.path.join(experiment_folder, "result_mean_val.csv"), val=ensemble_cfg.val_set) df_median.to_csv(os.path.join(experiment_folder, "forecast_median_val.csv")) df_mean.to_csv(os.path.join(experiment_folder, "forecast_mean_val.csv")) print(res_median) print(res_mean)
def evaluate_ensemble(folder_path): csvs = [ f for f in glob.glob(os.path.join(folder_path, "**/*csv")) if "forecast" in f ] csvs = [f for f in csvs if "test_name" in f] step_1_csvs = [f for f in csvs if "step1" in f] step_2_csvs = [f for f in csvs if "step2" in f] init_csvs = [f for f in csvs if "initialization" in f] csv_dict = {"step1": step_1_csvs, "step2": step_2_csvs, "init": init_csvs} for name, csv_list in csv_dict.items(): df_list = [] for csv_fname in csv_list: print(csv_fname) df = pd.read_csv(csv_fname) df_list.append(df) df_concat = pd.concat(df_list) by_row = df_concat.groupby(df_concat.index) df_median = by_row.median() df_mean = by_row.mean() preds_median = df_median.values preds_mean = df_mean.values print("scoring median") res_median = score_M4(predictions=preds_median, df_results_name=os.path.join( folder_path, f"{name}_result_median.csv"), val=False) print("scoring mean") res_mean = score_M4(predictions=preds_mean, df_results_name=os.path.join( folder_path, f"{name}_result_mean.csv"), val=False) #df_median.to_csv(os.path.join(experiment_folder, "forecast_median.csv")) #df_mean.to_csv(os.path.join(experiment_folder, "forecast_mean.csv")) print(res_median) print(res_mean) """
def test_scoreM4(self): rand_int = np.random.randint(999) naive_predictions = predict_M4(model=self.naive) mlp_predictions = predict_M4(model=self.mlp) naive_scores = score_M4( naive_predictions, f"GPTime/tests/results/M4/naive_test{rand_int}.csv") mlp_scores = score_M4( mlp_predictions, f"GPTime/tests/results/M4/mlp_test{rand_int}.csv") # scores were saved df_naive = pd.read_csv( f"GPTime/tests/results/M4/naive_test{rand_int}.csv", index_col=0) df_mlp = pd.read_csv(f"GPTime/tests/results/M4/mlp_test{rand_int}.csv", index_col=0) self.assertFalse(df_naive.empty, msg="Should be False") self.assertFalse(df_mlp.empty, msg="Should be False") # scores have right format self.assertEqual(df_naive.shape, (7, 3), "Shape should be (7,3)") self.assertEqual(df_mlp.shape, (7, 3), "Shape should be (7,3)")
def evaluate(evaluate_cfg): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") Model = getattr(importlib.import_module(evaluate_cfg.model_module), evaluate_cfg.model_name) # load model if Model.__name__ == "MLP": model_params = evaluate_cfg.model_params_mlp elif Model.__name__ == "AR": model_params = evaluate_cfg.model_params_ar elif Model.__name__ == "TCN": model_params = evaluate_cfg.model_params_tcn else: logger.warning("Unknown model name.") if evaluate_cfg.global_model: logger.info("Evaluating global model") model = Model(**model_params).double() model_path = os.path.join(evaluate_cfg.model_save_path, evaluate_cfg.name + ".pt") model.load_state_dict(torch.load(model_path)) model.to(device) model.eval() preds, df_preds = predict_M4(model=model, scale=evaluate_cfg.scale, seasonal_init=evaluate_cfg.seasonal_init, val_set=evaluate_cfg.val_set, encode_frequencies=evaluate_cfg.model_params_mlp.encode_frequencies) result_file = os.path.join(evaluate_cfg.result_path, "result.csv") logger.info(f"results fiel: {result_file}") d = score_M4(preds, df_results_name=result_file, val=evaluate_cfg.val_set) logger.info(d) csv_path = os.path.join(evaluate_cfg.predictions_path, "forecast.csv") df_preds.to_csv(csv_path) else: horizons = { "Y": 6, "Q": 8, "M": 18, "W": 13, "D": 14, "H": 48, } # find all models all_model_paths = glob.glob(os.path.join(evaluate_cfg.model_save_path, "*.pt")) all_model_paths.sort() logger.info(all_model_paths) all_dfs = [] for model_path in all_model_paths: logger.info(f"predicting model: {model_path}") logger.info(model_params) model_params["in_features"] = evaluate_cfg.lookback * horizons[model_path[-4]] model = Model(**model_params).double() model.load_state_dict(torch.load(model_path)) model.to(device) model.eval() logger.info(model_path) logger.info(model_path[0]) preds, df_preds = predict_M4(model=model, scale=evaluate_cfg.scale, seasonal_init=evaluate_cfg.seasonal_init, val_set=evaluate_cfg.val_set, freq=model_path[-4]) all_dfs.append(df_preds) logger.info(df_preds.head()) # concat dataframes df_all = pd.concat(all_dfs, sort=False) preds = df_all.values # save etc result_file = os.path.join(evaluate_cfg.model_save_path, "result.csv") d = score_M4(preds, df_results_name=result_file, val=evaluate_cfg.val_set) logger.info(d) csv_path = os.path.join(evaluate_cfg.model_save_path, "forecast.csv") df_all.to_csv(csv_path)
def train(train_cfg): #np.random.seed(1729) #torch.manual_seed(1729) Model = getattr(importlib.import_module(train_cfg.model_module), train_cfg.model_name) Criterion = getattr(importlib.import_module(train_cfg.criterion_module), train_cfg.criterion_name) Optimizer = getattr(importlib.import_module(train_cfg.optimizer_module), train_cfg.optimizer_name) Dataset = getattr(importlib.import_module(train_cfg.dataset_module), train_cfg.dataset_name) DataLoader = getattr(importlib.import_module(train_cfg.dataloader_module), train_cfg.dataloader_name) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger.info(f"Device: {device}") if Model.__name__ == "MLP": model_params = train_cfg.model_params_mlp elif Model.__name__ == "AR": model_params = train_cfg.model_params_ar elif Model.__name__ == "TCN": model_params = train_cfg.model_params_tcn else: logger.warning("Unknown model name.") model = Model(**model_params).double() model.to(device) logger.info( f"Number of learnable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}" ) #criterion = Criterion(**train_cfg.criterion_params) criterion = Criterion optimizer = Optimizer(model.parameters(), **train_cfg.optimizer_params) writer = SummaryWriter(log_dir=train_cfg.tensorboard_log_dir) # Learning rate num_lr_steps = 10 logger.info(f"{num_lr_steps} steps in the learning schedule if None") lr_decay_step = int(train_cfg.max_epochs) // num_lr_steps if lr_decay_step == 0: lr_decay_step = 1 if train_cfg.lr_scheduler == "multiplicative": lmbda = lambda epoch: 0.95 scheduler = MultiplicativeLR(optimizer, lr_lambda=lmbda, verbose=True) elif train_cfg.lr_scheduler == "plateau": scheduler = ReduceLROnPlateau(optimizer, "min", verbose=True, patience=train_cfg.patience) elif train_cfg.lr_scheduler == "cosine": scheduler = CosineAnnealingLR(optimizer, T_max=train_cfg.max_epochs, eta_min=0.00000001) elif train_cfg.lr_scheduler == "cosine_warm": scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=30, verbose=True) logger.info(f"Using learning rate sheduler: {train_cfg.lr_scheduler}") if train_cfg.swa: # TODO: Fix swa to work with early stop. Not really a problem without the validatoin set. swa_model = AveragedModel(model) swa_start = int(0.9 * train_cfg.max_epochs) swa_scheduler = SWALR(optimizer, swa_lr=0.000001) # Dataset # TODO: log lookback and loss function if not train_cfg.more_data: if train_cfg.val: logger.info("Using a validation split.") assert train_cfg.h_val + train_cfg.v_val < 2, "Horizontal and vertical validation split both selected!" # Make a train and val set if train_cfg.v_val: # make a horisontal train val split ds_train = Dataset( memory=model.memory, convolutions=True if Model.__name__ == "TCN" else False, ds_type="train", **train_cfg.dataset_params) ds_val = Dataset( memory=model.memory, convolutions=True if Model.__name__ == "TCN" else False, ds_type="val", **train_cfg.dataset_params) elif train_cfg.h_val: # make a vertical train val split # Proportion of ds to use assert train_cfg.proportion <= 1 and train_cfg.proportion > 0, "Proportion of dataset not between 0 and 1." proportion_length = int(ds.__len__() * train_cfg.proportion) ds_use, _ = random_split(dataset=ds, lengths=[ proportion_length, ds.__len__() - proportion_length ]) train_length = int(ds_use.__len__() * train_cfg.train_set_size) val_length = ds_use.__len__() - train_length train_ds, val_ds = random_split( dataset=ds_use, lengths=[train_length, val_length], #generator=torch.torch.Generator() ) logger.info( f"Using {train_cfg.proportion * 100}% of the available dataset." ) logger.info( f"Using frequencies: {[freq for freq, true_false in train_cfg.dataset_params.frequencies.items() if true_false]}" ) logger.info( f"Train size: {train_ds.__len__()}, Val size: {val_ds.__len__()}" ) # Dataloader train_loader = DataLoader(train_ds, **train_cfg.dataloader_params) val_loader = DataLoader(val_ds, **train_cfg.dataloader_params) test_loader = DataLoader(train_ds, **train_cfg.dataloader_params) else: # Not specified logger.warning("Type of train val split not specified!") raise Warning else: logger.info( "Not using a validation set. Training on the full dataset.") ds_train = Dataset( memory=model.memory, convolutions=True if Model.__name__ == "TCN" else False, ds_type="full", **train_cfg.dataset_params) logger.info(f"seasonal init: {train_cfg.seasonal_init}") train_loader = DataLoader(dataset=ds_train, **train_cfg.dataloader_params) if train_cfg.val: val_loader = DataLoader(dataset=ds_val, **train_cfg.dataloader_params) logger.info("Training model.") logger.info(f"Length of dataset: {len(ds_train)}") else: # Make one dataset for each frequency logger.debug(f"proportion used: {train_cfg.proportion}") datasets = [] dataset_paths = {} m4_path_dict = {} for ds_name in train_cfg.dataset_params.dataset_paths.keys(): if ds_name != "M4": dataset_paths[ ds_name] = train_cfg.dataset_params.dataset_paths[ds_name] else: m4_path_dict[ds_name] = train_cfg.dataset_params.dataset_paths[ ds_name] logger.debug(f"dataset_paths: {dataset_paths}") for freq in train_cfg.dataset_params.frequencies.keys(): if train_cfg.dataset_params.frequencies[freq]: #logger.debug(freq) use_frequencies = {} for freq_set in train_cfg.dataset_params.frequencies.keys(): if freq_set == freq: use_frequencies[freq_set] = True else: use_frequencies[freq_set] = False tmp_params = copy.copy(train_cfg.dataset_params) tmp_params.frequencies = use_frequencies tmp_params.dataset_paths = dataset_paths ds_freq = Dataset( memory=model.memory, convolutions=False, **tmp_params, ) datasets.append(ds_freq) prop_datasets = [] for ds in datasets: split = int(len(ds) * train_cfg.proportion) #logger.debug(f"len(ds): {len(ds)}") #logger.debug(f"split idx: {split}") indices = list(range(len(ds))) np.random.seed(1729) np.random.shuffle(indices) keep_indices = indices[:split] #logger.debug(f"len(keep_indices): {len(keep_indices)}") prop_ds = Subset(dataset=ds, indices=keep_indices) #logger.debug(f"len(prop_ds): {len(prop_ds)}") prop_datasets.append(prop_ds) if len(m4_path_dict): tmp_params = copy.copy(train_cfg.dataset_params) tmp_params.dataset_paths = m4_path_dict m4_ds = Dataset( memory=model.memory, convolutions=False, **tmp_params, ) prop_datasets.append(m4_ds) concat_ds = ConcatDataset(prop_datasets) logger.debug(f"len(concat_ds): {len(concat_ds)}") train_loader = DataLoader(concat_ds, **train_cfg.dataloader_params) running_loss = 0.0 val_running_loss = 0.0 low_loss = np.inf early_stop_count = 0 for ep in range(1, train_cfg.max_epochs + 1): epoch_loss = 0.0 batches_non_inf = 0 for i, data in enumerate(train_loader): model.train() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) #last_period = data[4].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if train_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) optimizer.zero_grad() if train_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) if len((max_scale == 0).nonzero()) > 0: zero_idx = (max_scale == 0).nonzero() max_scale[zero_idx[:, 0], zero_idx[:, 1]] = 1.0 sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) if train_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) training_loss = criterion(forecast, label, sample, sample_mask, freq_int) #training_loss = smape_2_loss(forecast, label, sample, sample_mask, freq_int) if np.isnan(float(training_loss)): logger.warning("Training loss is inf") logger.debug(i, data) break training_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() batches_non_inf += 1 epoch_loss += training_loss.item() running_loss += epoch_loss / batches_non_inf if train_cfg.val: val_epoch_loss = 0.0 val_batches_non_inf = 0 for i, data in enumerate(val_loader): model.eval() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) #last_period = data[4].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if train_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) if train_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) #forecast = model(sample, sample_mask, last_period) if train_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) # TODO: Fix loss val_loss = criterion(forecast, label, sample, sample_mask, freq_int) val_batches_non_inf += 1 val_epoch_loss += val_loss.item() val_running_loss += val_epoch_loss / val_batches_non_inf if train_cfg.val: if val_epoch_loss < low_loss: low_loss = val_epoch_loss early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > train_cfg.early_stop_tenacity: logger.info(f"Early stop after epoch {ep}.") break if train_cfg.swa and ep > swa_start: swa_model.update_parameters(model) swa_scheduler.step() else: if train_cfg.lr_scheduler == "plateau": if train_cfg.val: scheduler.step(val_epoch_loss) else: scheduler.step(epoch_loss) elif train_cfg.lr_scheduler is not None: scheduler.step() if train_cfg.lr_scheduler is None: for param_group in optimizer.param_groups: old_lr = param_group["lr"] param_group["lr"] = train_cfg.optimizer_params.lr * 0.5**( ep // lr_decay_step) new_lr = param_group["lr"] if old_lr != new_lr: logger.info( f"Changed learning rate. Current lr = {param_group['lr']}" ) if (ep) % train_cfg.log_freq == 0: if train_cfg.val: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss], [avg. ValLoss, ValLoss]: [{running_loss / train_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}] [{val_running_loss / train_cfg.log_freq :.4f}, {val_epoch_loss / val_batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 val_running_loss = 0.0 else: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss]: [{running_loss / train_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 preds, df_preds = predict_M4( model=model, scale=train_cfg.scale, seasonal_init=train_cfg.seasonal_init, encode_frequencies=train_cfg.model_params_mlp.encode_frequencies) res = score_M4(predictions=preds) logger.info(res) # save model filename = os.path.join(train_cfg.model_save_path, train_cfg.name + ".pt") os.makedirs(os.path.join(train_cfg.model_save_path, train_cfg.name)) torch.save(model.state_dict(), filename) #filename = os.path.join(train_cfg.model_save_path, train_cfg.name + ".yml") #train_cfg.to_yaml(filename) # vv comment out #preds, df_preds = predict_M4(model=model, scale=train_cfg.scale, seasonal_init=train_cfg.seasonal_init) #res = score_M4(predictions=preds) #logger.info(res) logger.info("Finished training!") if train_cfg.swa: filename = os.path.join(train_cfg.model_save_path, train_cfg.name + "_swa.pt") torch.save(model.state_dict(), filename) preds = predict_M4(model=swa_model, scale=train_cfg.scale, seasonal_init=train_cfg.seasonal_init) res = score_M4(predictions=preds) logger.info(res) logger.info("Finished SWA!")
def finetune(finetune_cfg): logger.debug("FINETUNE!") Model = getattr(importlib.import_module(finetune_cfg.model_module), finetune_cfg.model_name) Criterion = getattr(importlib.import_module(finetune_cfg.criterion_module), finetune_cfg.criterion_name) Optimizer = getattr(importlib.import_module(finetune_cfg.optimizer_module), finetune_cfg.optimizer_name) Dataset = getattr(importlib.import_module(finetune_cfg.dataset_module), finetune_cfg.dataset_name) DataLoader = getattr( importlib.import_module(finetune_cfg.dataloader_module), finetune_cfg.dataloader_name) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger.info(f"Device: {device}") # load model model = Model(**finetune_cfg.model_params_mlp) model.load_state_dict(torch.load(finetune_cfg.model_path)) logger.info( f"Number of learnable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}" ) # Set all parameters to require grad false for param in model.parameters(): param.requires_grad = False logger.info( f"Number of learnable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}" ) model.double() model.to(device) preds, df_preds = predict_M4( model=model, scale=finetune_cfg.scale, seasonal_init=finetune_cfg.seasonal_init, val_set=finetune_cfg.val, encode_frequencies=finetune_cfg.model_params_mlp.encode_frequencies) res = score_M4(predictions=preds, val=finetune_cfg.val) logger.info(res) preds, df_preds = predict_M4( model=model, scale=finetune_cfg.scale, seasonal_init=finetune_cfg.seasonal_init, encode_frequencies=finetune_cfg.model_params_mlp.encode_frequencies) res = score_M4(predictions=preds) logger.info(res) # set the out layer to a new layer, or last N layers. use require grad=True model.layers[-1] = nn.Linear(in_features=1024, out_features=1024) #model.out_layer = nn.Linear(in_features=1024, out_features=1) model.out = nn.Linear(in_features=1024, out_features=1) model.double() model.to(device) print(model) logger.info( f"Number of learnable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}" ) criterion = Criterion params_to_update = [] for param in model.parameters(): if param.requires_grad: params_to_update.append(param) print("len(params_to_update): ", len(params_to_update)) optimizer = Optimizer(params_to_update, **finetune_cfg.optimizer_params) # Learning rate num_lr_steps = 10 logger.info(f"{num_lr_steps} steps in the learning schedule if None") lr_decay_step = int(finetune_cfg.max_epochs_1) // num_lr_steps if lr_decay_step == 0: lr_decay_step = 1 if finetune_cfg.lr_scheduler == "plateau": scheduler = ReduceLROnPlateau(optimizer, "min", verbose=True, patience=finetune_cfg.patience) logger.info(f"Using learning rate sheduler: {finetune_cfg.lr_scheduler}") # load dataset, with validation set if finetune_cfg.val: logger.info("Using a validation split.") # Make a train and val set ds_train = Dataset( memory=model.memory, convolutions=True if Model.__name__ == "TCN" else False, ds_type="train", **finetune_cfg.dataset_params) ds_val = Dataset( memory=model.memory, convolutions=True if Model.__name__ == "TCN" else False, ds_type="val", **finetune_cfg.dataset_params) else: logger.info( "Not using a validation set. Training on the full dataset.") ds_train = Dataset( memory=model.memory, convolutions=True if Model.__name__ == "TCN" else False, ds_type="full", **finetune_cfg.dataset_params) train_loader = DataLoader(ds_train, **finetune_cfg.dataloader_params) if finetune_cfg.val: val_loader = DataLoader(ds_train, **finetune_cfg.dataloader_params) # Train the final layer(s) until val stops # Use lower learning rate running_loss = 0.0 val_running_loss = 0.0 low_loss = np.inf early_stop_count = 0 for ep in range(1, finetune_cfg.max_epochs_1 + 1): epoch_loss = 0.0 batches_non_inf = 0 for i, data in enumerate(train_loader): model.train() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) #last_period = data[4].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if finetune_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) optimizer.zero_grad() if finetune_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) #if len((max_scale == 0).nonzero()) > 0: if len(torch.nonzero(max_scale == 0)) > 0: zero_idx = (max_scale == 0).nonzero() max_scale[zero_idx[:, 0], zero_idx[:, 1]] = 1.0 sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) if finetune_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) training_loss = criterion(forecast, label, sample, sample_mask, freq_int) if np.isnan(float(training_loss)): logger.warning("Training loss is inf") logger.debug(i, data) break training_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() batches_non_inf += 1 epoch_loss += training_loss.item() running_loss += epoch_loss / batches_non_inf if finetune_cfg.val: val_epoch_loss = 0.0 val_batches_non_inf = 0 for i, data in enumerate(val_loader): model.eval() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if finetune_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) if finetune_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) if finetune_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) val_loss = criterion(forecast, label, sample, sample_mask, freq_int) val_batches_non_inf += 1 val_epoch_loss += val_loss.item() val_running_loss += val_epoch_loss / val_batches_non_inf if finetune_cfg.val: if val_epoch_loss < low_loss: low_loss = val_epoch_loss early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > finetune_cfg.early_stop_tenacity: logger.info(f"Early stop after epoch {ep}.") break if finetune_cfg.lr_scheduler == "plateau": if finetune_cfg.val: scheduler.step(val_epoch_loss) else: scheduler.step(epoch_loss) elif finetune_cfg.lr_scheduler is not None: scheduler.step() if finetune_cfg.lr_scheduler is None: for param_group in optimizer.param_groups: old_lr = param_group["lr"] param_group["lr"] = finetune_cfg.optimizer_params.lr * 0.5**( ep // lr_decay_step) new_lr = param_group["lr"] if old_lr != new_lr: logger.info( f"Changed learning rate. Current lr = {param_group['lr']}" ) if (ep) % finetune_cfg.log_freq == 0: if finetune_cfg.val: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss], [avg. ValLoss, ValLoss]: [{running_loss / finetune_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}] [{val_running_loss / finetune_cfg.log_freq :.4f}, {val_epoch_loss / val_batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 val_running_loss = 0.0 else: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss]: [{running_loss / finetune_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 forecast_path = os.path.join(finetune_cfg.model_save_path, f"{finetune_cfg.name}_step1_forecast.csv") result_path = os.path.join(finetune_cfg.model_save_path, f"{finetune_cfg.name}_step1_result.csv") preds, df_preds = predict_M4( model=model, scale=finetune_cfg.scale, seasonal_init=finetune_cfg.seasonal_init, val_set=finetune_cfg.val, encode_frequencies=finetune_cfg.model_params_mlp.encode_frequencies) res = score_M4(predictions=preds, df_results_name=result_path, val=finetune_cfg.val) logger.info(res) df_preds.to_csv(forecast_path) # set the out layer to a new layer, or last N layers. use require grad=True logger.info( f"Number of learnable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}" ) for param in model.parameters(): param.requires_grad = True logger.info( f"Number of learnable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}" ) optimizer = Optimizer(model.parameters(), **finetune_cfg.optimizer_params) criterion = Criterion # Learning rate num_lr_steps = 10 logger.info(f"{num_lr_steps} steps in the learning schedule if None") lr_decay_step = int(finetune_cfg.max_epochs_2) // num_lr_steps if lr_decay_step == 0: lr_decay_step = 1 if finetune_cfg.lr_scheduler == "plateau": scheduler = ReduceLROnPlateau(optimizer, "min", verbose=True, patience=finetune_cfg.patience) logger.info(f"Using learning rate sheduler: {finetune_cfg.lr_scheduler}") running_loss = 0.0 val_running_loss = 0.0 low_loss = np.inf early_stop_count = 0 for ep in range(1, finetune_cfg.max_epochs_2 + 1): epoch_loss = 0.0 batches_non_inf = 0 for i, data in enumerate(train_loader): model.train() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) #last_period = data[4].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if finetune_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) optimizer.zero_grad() if finetune_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) if len((max_scale == 0).nonzero()) > 0: zero_idx = (max_scale == 0).nonzero() max_scale[zero_idx[:, 0], zero_idx[:, 1]] = 1.0 sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) if finetune_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) training_loss = criterion(forecast, label, sample, sample_mask, freq_int) if np.isnan(float(training_loss)): logger.warning("Training loss is inf") logger.debug(i, data) break training_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() batches_non_inf += 1 epoch_loss += training_loss.item() running_loss += epoch_loss / batches_non_inf if finetune_cfg.val: val_epoch_loss = 0.0 val_batches_non_inf = 0 for i, data in enumerate(val_loader): model.eval() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if finetune_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) if finetune_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) if finetune_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) val_loss = criterion(forecast, label, sample, sample_mask, freq_int) val_batches_non_inf += 1 val_epoch_loss += val_loss.item() val_running_loss += val_epoch_loss / val_batches_non_inf if finetune_cfg.val: if val_epoch_loss < low_loss: low_loss = val_epoch_loss early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > finetune_cfg.early_stop_tenacity: logger.info(f"Early stop after epoch {ep}.") break if finetune_cfg.lr_scheduler == "plateau": if finetune_cfg.val: scheduler.step(val_epoch_loss) else: scheduler.step(epoch_loss) elif finetune_cfg.lr_scheduler is not None: scheduler.step() if finetune_cfg.lr_scheduler is None: for param_group in optimizer.param_groups: old_lr = param_group["lr"] param_group["lr"] = finetune_cfg.optimizer_params.lr * 0.5**( ep // lr_decay_step) new_lr = param_group["lr"] if old_lr != new_lr: logger.info( f"Changed learning rate. Current lr = {param_group['lr']}" ) if (ep) % finetune_cfg.log_freq == 0: if finetune_cfg.val: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss], [avg. ValLoss, ValLoss]: [{running_loss / finetune_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}] [{val_running_loss / finetune_cfg.log_freq :.4f}, {val_epoch_loss / val_batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 val_running_loss = 0.0 else: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss]: [{running_loss / finetune_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 forecast_path = os.path.join(finetune_cfg.model_save_path, f"{finetune_cfg.name}_step2_forecast.csv") result_path = os.path.join(finetune_cfg.model_save_path, f"{finetune_cfg.name}_step2_result.csv") preds, df_preds = predict_M4( model=model, scale=finetune_cfg.scale, seasonal_init=finetune_cfg.seasonal_init, val_set=finetune_cfg.val, encode_frequencies=finetune_cfg.model_params_mlp.encode_frequencies) res = score_M4(predictions=preds, df_results_name=result_path, val=finetune_cfg.val) logger.info(res) df_preds.to_csv(forecast_path) model = Model(**finetune_cfg.model_params_mlp) model.load_state_dict(torch.load(finetune_cfg.model_path)) model.double() model.to(device) logger.info( f"Number of learnable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}" ) criterion = Criterion optimizer = Optimizer(model.parameters(), **finetune_cfg.optimizer_params) # Learning rate num_lr_steps = 10 logger.info(f"{num_lr_steps} steps in the learning schedule if None") lr_decay_step = int(finetune_cfg.max_epochs_3) // num_lr_steps if lr_decay_step == 0: lr_decay_step = 1 if finetune_cfg.lr_scheduler == "plateau": scheduler = ReduceLROnPlateau(optimizer, "min", verbose=True, patience=finetune_cfg.patience) logger.info(f"Using learning rate sheduler: {finetune_cfg.lr_scheduler}") running_loss = 0.0 val_running_loss = 0.0 low_loss = np.inf early_stop_count = 0 for ep in range(1, finetune_cfg.max_epochs_3 + 1): epoch_loss = 0.0 batches_non_inf = 0 for i, data in enumerate(train_loader): model.train() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) #last_period = data[4].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if finetune_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) optimizer.zero_grad() if finetune_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) if len((max_scale == 0).nonzero()) > 0: zero_idx = (max_scale == 0).nonzero() max_scale[zero_idx[:, 0], zero_idx[:, 1]] = 1.0 sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) if finetune_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) training_loss = criterion(forecast, label, sample, sample_mask, freq_int) if np.isnan(float(training_loss)): logger.warning("Training loss is inf") logger.debug(i, data) break training_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() batches_non_inf += 1 epoch_loss += training_loss.item() running_loss += epoch_loss / batches_non_inf if finetune_cfg.val: val_epoch_loss = 0.0 val_batches_non_inf = 0 for i, data in enumerate(val_loader): model.eval() sample = data[0].to(device) label = data[1].to(device) sample_mask = data[2].to(device) label_mask = data[3].to(device) freq_int = data[4].to(device) freq_str_arr = np.expand_dims(np.array(data[5]), axis=1) if finetune_cfg.seasonal_init: last_period = sample.shape[1] - freq_int else: last_period = torch.tensor(sample.shape[1] - 1).repeat( sample.shape[0]).to(device) if finetune_cfg.scale: max_scale = torch.max(sample, 1).values.unsqueeze(1) sample = torch.div(sample, max_scale) sample[torch.isnan(sample)] = 0.0 forecast = model(sample, sample_mask, last_period, freq_str_arr) if finetune_cfg.scale: forecast = torch.mul(forecast, max_scale) sample = torch.mul(sample, max_scale) val_loss = criterion(forecast, label, sample, sample_mask, freq_int) val_batches_non_inf += 1 val_epoch_loss += val_loss.item() val_running_loss += val_epoch_loss / val_batches_non_inf if finetune_cfg.val: if val_epoch_loss < low_loss: low_loss = val_epoch_loss early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > finetune_cfg.early_stop_tenacity: logger.info(f"Early stop after epoch {ep}.") break if finetune_cfg.lr_scheduler == "plateau": if finetune_cfg.val: scheduler.step(val_epoch_loss) else: scheduler.step(epoch_loss) elif finetune_cfg.lr_scheduler is not None: scheduler.step() if finetune_cfg.lr_scheduler is None: for param_group in optimizer.param_groups: old_lr = param_group["lr"] param_group["lr"] = finetune_cfg.optimizer_params.lr * 0.5**( ep // lr_decay_step) new_lr = param_group["lr"] if old_lr != new_lr: logger.info( f"Changed learning rate. Current lr = {param_group['lr']}" ) if (ep) % finetune_cfg.log_freq == 0: if finetune_cfg.val: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss], [avg. ValLoss, ValLoss]: [{running_loss / finetune_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}] [{val_running_loss / finetune_cfg.log_freq :.4f}, {val_epoch_loss / val_batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 val_running_loss = 0.0 else: logger.info( f"Epoch {ep:<5d} [Avg. Loss, Loss]: [{running_loss / finetune_cfg.log_freq :.4f}, {epoch_loss / batches_non_inf:.4f}], {early_stop_count}" ) running_loss = 0.0 forecast_path = os.path.join( finetune_cfg.model_save_path, f"{finetune_cfg.name}_initialization_forecast.csv") result_path = os.path.join( finetune_cfg.model_save_path, f"{finetune_cfg.name}_initialization_result.csv") preds, df_preds = predict_M4( model=model, scale=finetune_cfg.scale, seasonal_init=finetune_cfg.seasonal_init, val_set=finetune_cfg.val, encode_frequencies=finetune_cfg.model_params_mlp.encode_frequencies) res = score_M4(predictions=preds, df_results_name=result_path, val=finetune_cfg.val) logger.info(res) df_preds.to_csv(forecast_path) logger.info("Finished training!")
def evaluate_ensemble(folder_path): step1_dfs = [] step2_dfs = [] init_dfs = [] dirs = glob.glob(os.path.join(folder_path, "**/")) print(dirs) for d in dirs: print(d) fnames = [f for f in glob.glob(os.path.join(d, "*")) if "forecast" in f] dc = {} dc["step1"] = [f for f in fnames if "_step1" in f] dc["step2"] = [f for f in fnames if "_step2" in f] dc["init"] = [f for f in fnames if "_init" in f] for name, l in dc.items(): l.sort() dfs = [] for fname in l: print(fname) freq = fname.split("/")[-1][0] print(f"freq: {freq}") df = pd.read_csv(fname, index_col=0) df = df[df.index.str.contains(freq, na=False)] dfs.append(df) df = pd.concat(dfs, sort=False) result_fname = os.path.join(d, f"{name}_forecast.csv") df.to_csv(result_fname) if name == "step1": step1_dfs.append(df) elif name == "step2": step2_dfs.append(df) elif name == "init": init_dfs.append(df) else: print("WARNING: Couldnæt find name of df") res = score_M4(predictions=df.values, df_results_name=os.path.join(d, f"{name}_result_median.csv"), val=False) print(res) step1_fnames = glob.glob(os.path.join(folder_path, "**/step1_forecast.csv")) step2_fnames = glob.glob(os.path.join(folder_path, "**/step2_forecast.csv")) init_fnames = glob.glob(os.path.join(folder_path, "**/init_forecast.csv")) print(step1_fnames) print(step2_fnames) print(init_fnames) csv_dict = {"step1": step1_fnames, "step2": step2_fnames, "init": init_fnames} for name, csv_list in csv_dict.items(): df_list = [] for csv_fname in csv_list: print(csv_fname) df = pd.read_csv(csv_fname) df_list.append(df) df_concat = pd.concat(df_list) by_row = df_concat.groupby(df_concat.index) df_median = by_row.median() df_mean = by_row.mean() preds_median = df_median.values preds_mean = df_mean.values print("scoring median") res_median = score_M4(predictions=preds_median, df_results_name=os.path.join(folder_path, f"{name}_result_median.csv"), val=False) print("scoring mean") res_mean = score_M4(predictions=preds_mean, df_results_name=os.path.join(folder_path, f"{name}_result_mean.csv"), val=False) #df_median.to_csv(os.path.join(experiment_folder, "forecast_median.csv")) #df_mean.to_csv(os.path.join(experiment_folder, "forecast_mean.csv")) print(res_median) print(res_mean)