def main(args): data_path = os.path.join(args.dataset_path, args.dataset) train_data = TSDataset(data_path + '-train.csv', args.windows, args.horizon) torch.save(train_data.scaler, 'scaler.pt') val_data = TSDataset(data_path + '-val.csv', args.windows, args.horizon, train_data.scaler) # test_data = TSDataset(data_path + '-test.csv', args.windows, args.horizon) train_loader = DataLoader(train_data, args.batch_size, shuffle=True) val_loader = DataLoader(val_data, args.batch_size, shuffle=True) D = train_data[0][0].shape[-1] device = 'cuda' if torch.cuda.is_available() else 'cpu' net = DSANet(D, args.windows, args.horizon, args.n_global, args.n_local, args.n_local_filter, args.n_global_head, args.n_global_hidden, args.n_global_stack, args.n_local_head, args.n_local_hidden, args.n_local_stack, args.dropout) net = net.to(device) loss_fn = torch.nn.MSELoss() optimizer = torch.optim.Adam(net.parameters(), lr = args.lr) for e in range(1, args.epochs): # train one epochs train_loss = 0.0 for index, (X, y) in enumerate(train_loader): optimizer.zero_grad() yhat = net(X.type(torch.float32).to(device)) loss = loss_fn(yhat, y.type(torch.float32).to(device)) train_loss += loss.item() loss.backward() optimizer.step() val_loss = 0.0 with torch.no_grad(): for (X, y) in val_loader: yhat = net(X.type(torch.float32).to(device)) loss = loss_fn(yhat, y.type(torch.float32).to(device)) val_loss += loss.item() train_loss /= len(train_loader) val_loss /= len(val_loader) print('Epoch %d: train loss is %.2f, val loss is %.2f' % (e, train_loss, val_loss)) torch.save(net.state_dict(), 'net-%d-%.2f.pt' % (e, val_loss))
def main(hparams): """ Main training routine specific for this project """ # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ print('loading model...') model = DSANet(hparams) print('model built') # ------------------------ # 2 INIT TEST TUBE EXP # ------------------------ # init experiment exp = Experiment( name='dsanet_exp_{}_window={}_horizon={}'.format(hparams.data_name, hparams.window, hparams.horizon), save_dir=hparams.test_tube_save_path, autosave=False, description='test demo' ) exp.argparse(hparams) exp.save() # ------------------------ # 3 DEFINE CALLBACKS # ------------------------ model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version) early_stop = EarlyStopping( monitor='val_loss', patience=5, verbose=True, mode='min' ) # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = Trainer( gpus=[0], # auto_scale_batch_size=True, max_epochs=10, # num_processes=2, # num_nodes=2 ) # ------------------------ # 5 START TRAINING # ------------------------ trainer.fit(model) print('View tensorboard logs by running\ntensorboard --logdir %s' % os.getcwd()) print('and going to http://localhost:6006 on your browser')
def main(args): data_path = os.path.join(args.dataset_path, args.dataset) scaler = torch.load(args.scaler) test_data = TSDataset(data_path + '-test.csv', args.windows, args.horizon, scaler) test_loader = DataLoader(test_data, args.batch_size) D = test_data[0][0].shape[-1] device = 'cuda' if torch.cuda.is_available() else 'cpu' net = DSANet(D, args.windows, args.horizon, args.n_global, args.n_local, args.n_local_filter, args.n_global_head, args.n_global_hidden, args.n_global_stack, args.n_local_head, args.n_local_hidden, args.n_local_stack, args.dropout) loss_fns = [] for metric in args.metrics: if metric == 'RMSE': loss_fns.append(RMSE) elif metric == 'MSE': loss_fns.append(MSE) elif metric == 'MAE': loss_fns.append(MAPE) elif metric == 'RRSE': loss_fns.append(RRSE) elif metric == 'MAPE': loss_fns.append(MAPE) else: loss_fns.append(lambda yhat, y: np.nan) net.load_state_dict(torch.load(args.model)) net = net.to(device) test_losses = [0.0 for i in range(len(loss_fns))] with torch.no_grad(): for (X, y) in test_loader: yhat = net(X.type(torch.float32).to(device)).to('cpu').numpy() y = y.to('cpu').numpy() for i, loss_fn in enumerate(loss_fns): loss = loss_fn(yhat, y) test_losses[i] += loss for metric, loss in zip(args.metrics, test_losses): print('%s: %.2f' % (metric, np.mean(loss)))
def main(hparams): """ Main training routine specific for this project """ # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ print('loading model...') model = DSANet(hparams) print('model built') # ------------------------ # 2 INIT TEST TUBE EXP # ------------------------ logger = TestTubeLogger("tb_logs_v2", name="my_dsanet_pow") # ------------------------ # 3 DEFINE CALLBACKS # ------------------------ early_stop_callback = EarlyStopping(monitor='val_loss', patience=25, verbose=False, mode='min') # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = pl.Trainer( gpus=2, distributed_backend='dp', logger=logger, early_stop_callback=early_stop_callback, show_progress_bar=False, profiler=True, ) # ------------------------ # 5 START TRAINING # ------------------------ st_time = datetime.now() result = trainer.fit(model) print(result) eval_time = str(datetime.now() - st_time) print(f"Train time: {eval_time}") st_time = datetime.now() result = trainer.test() eval_time = str(datetime.now() - st_time) print(f"Test time: {eval_time}") print(result)
# although we user hyperOptParser, we are using it only as argparse right now parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False) # gpu args parent_parser.add_argument('--test_tube_save_path', type=str, default=test_tube_dir, help='where to save logs') parent_parser.add_argument('--model_save_path', type=str, default=checkpoint_dir, help='where to save model') # allow model to overwrite or extend args parser = DSANet.add_model_specific_args(parent_parser, root_dir) hyperparams = parser.parse_args() print(hyperparams) # --------------------- # RUN TRAINING # --------------------- # run on HPC cluster print(f'RUNNING ON CPU') # * change the following code to comments for grid search main(hyperparams) # * recover the following code for grid search # hyperparams.optimize_parallel_cpu( # main, # nb_trials=24, # this number needs to be adjusted according to the actual situation
def optimize(optimizer_params): """ Main training routine specific for this project """ global out_file, ITERATION ITERATION += 1 # dirs root_dir = os.path.dirname(os.path.realpath(__file__)) demo_log_dir = os.path.join(root_dir, 'dsanet_logs') checkpoint_dir = os.path.join(demo_log_dir, 'model_weights') test_tube_dir = os.path.join(demo_log_dir, 'test_tube_data') # although we user hyperOptParser, we are using it only as argparse right now parent_parser = ArgumentParser( add_help=False) # gpu args parent_parser.add_argument('--test_tube_save_path', type=str, default=test_tube_dir, help='where to save logs') parent_parser.add_argument('--model_save_path', type=str, default=checkpoint_dir, help='where to save model') # allow model to overwrite or extend args parser = DSANet.add_model_specific_args(parent_parser, root_dir) hyperparams = parser.parse_args() setattr(hyperparams, 'batch_size', int(optimizer_params['batch_size'])) setattr(hyperparams, 'drop_prob', optimizer_params['dropout']) setattr(hyperparams, 'learning_rate', optimizer_params['learning_rate']) setattr(hyperparams, 'd_model', int(optimizer_params['units'])) setattr(hyperparams, 'local', int(optimizer_params['local'])) setattr(hyperparams, 'n_kernels', int(optimizer_params['n_kernels'])) setattr(hyperparams, 'window', int(optimizer_params['window'])) hparams = hyperparams print(f"TESTING hparams: mv:{hparams.n_multiv}, bs:{hparams.batch_size}, drop:{hparams.drop_prob}, lr:{hparams.learning_rate}, d_model:{hparams.d_model}, local:{hparams.local}, n_kernels:{hparams.n_kernels}, window:{hparams.window}") # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ print('loading model...') model = DSANet(hparams) print('model built') # ------------------------ # 2 INIT TEST TUBE EXP # ------------------------ logger = TestTubeLogger("tb_logs", name="my_dsanet_power_v2") # ------------------------ # 3 DEFINE CALLBACKS # ------------------------ early_stop_callback = EarlyStopping( monitor='val_loss', patience=25, verbose=False, mode='min' ) # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = pl.Trainer( gpus=4, distributed_backend='dp', logger=logger, early_stop_callback=early_stop_callback, show_progress_bar=False, log_save_interval=10, ) # ------------------------ # 5 START TRAINING # ------------------------ st_time = datetime.now() result = trainer.fit(model) print(result) eval_time = str(datetime.now() - st_time) print(eval_time) print(f"Iteration {ITERATION}: Getting results...") csv_load_path = os.path.join(root_dir, logger.experiment.save_dir) csv_load_path = '{}/{}/{}{}'.format(csv_load_path, logger.experiment.name, 'version_', logger.experiment.version) df = pd.read_csv('{}/{}'.format(csv_load_path, 'metrics.csv')) # change to experiment save dir min_idx = df['val_nd'].idxmin() of_connection = open(out_file, 'a') writer = csv.writer(of_connection) writer.writerow([optimizer_params, hparams, df['val_loss'].iloc[min_idx], df['val_loss'].iloc[min_idx], df['val_nd'].iloc[min_idx], df['NRMSE'].iloc[min_idx], df['val_rho10'].iloc[min_idx], df['val_rho50'].iloc[min_idx], df['val_rho90'].iloc[min_idx], eval_time, STATUS_OK]) of_connection.close() #torch.cuda.empty_cache() return {'loss': df['val_nd'].iloc[min_idx], 'ND': df['val_nd'].iloc[min_idx], 'NRMSE': df['NRMSE'].iloc[min_idx], 'val_loss': df['val_loss'].iloc[min_idx], 'params': optimizer_params, 'rho_metric': {'rho10': df['val_rho10'].iloc[min_idx], 'rho50': df['val_rho50'].iloc[min_idx], 'rho90': df['val_rho90'].iloc[min_idx]}, 'iteration': ITERATION, 'eval_time': eval_time, 'status': STATUS_OK}
import csv import pytorch_lightning as pl import torch from model import DSANet from datetime import datetime out_file = '/scratch/project_2002244/DSANet/save/test_runs_electricity_final_v2.csv' ckpt_load_path = '/scratch/project_2002244/DSANet/tb_logs_v2' path_list = [ os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(ckpt_load_path) for filename in filenames if filename.endswith('.ckpt') ] for filename in path_list: model = DSANet.load_from_checkpoint(filename) trainer = pl.Trainer(resume_from_checkpoint=filename) if model.hparams.n_multiv == 321 or model.hparams.n_multiv == 327: print('we have electricity data') else: continue if hasattr(model.hparams, 'mcdropout'): print("we have mcdropout") else: print("we set mcdropout to False") setattr(model.hparams, 'mcdropout', 'False') if hasattr(model.hparams, 'powerset'): print("we have powerset")
def main(hparams): """ Main training routine specific for this project """ # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ print('loading model...') model = DSANet(hparams) print('model built') # ------------------------ # 2 INIT TEST TUBE EXP # ------------------------ # init experiment exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format( hparams.data_name, hparams.window, hparams.horizon), save_dir=hparams.test_tube_save_path, autosave=False, description='test demo') exp.argparse(hparams) exp.save() # ------------------------ # 3 DEFINE CALLBACKS # ------------------------ model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version) checkpoint_callback = ModelCheckpoint(filepath=model_save_path, save_best_only=True, verbose=True, monitor='val_loss', mode='auto') early_stop = EarlyStopping(monitor='val_loss', patience=25, verbose=True, mode='min') # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = Trainer( gpus="0", distributed_backend='dp', experiment=exp, early_stop_callback=early_stop, checkpoint_callback=checkpoint_callback, ) # ------------------------ # 5 START TRAINING # ------------------------ if hparams.test_only: model_load_path = '{}/{}'.format(hparams.model_save_path, exp.name) # metrics_load_path = '{}/{}'.format(hparams.test_tube_save_path, exp.name) path_list = [ os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(model_load_path) for filename in filenames if filename.endswith('.ckpt') ] # for dirpath, dirnames, filenames in os.walk(model_load_path): # if filename in [f for f in filenames if f.endswith(".ckpt")]: for filename in path_list: print(filename) data = filename.split("/") version_number = data[len(data) - 2] metrics_load_path = '{}/{}'.format(hparams.test_tube_save_path, exp.name) metrics_load_path = '{}/{}{}/{}'.format(metrics_load_path, 'version_', version_number, 'meta_tags.csv') print(metrics_load_path) hparams.metrics_load_path = metrics_load_path model = DSANet(hparams) model = DSANet.load_from_metrics(weights_path=filename, tags_csv=metrics_load_path, on_gpu=True) # model = LightningModule.load_from_checkpoint(filename) # test (pass in the model) hparams.metrics_load_path = metrics_load_path result = trainer.test(model) print(result) else: result = trainer.fit(model) print('View tensorboard logs by running\ntensorboard --logdir %s' % os.getcwd()) print('and going to http://localhost:6006 on your browser')
def optimize(optimizer_params): """ Main training routine specific for this project """ global val_results, test_results global val_out_file, test_out_file, ITERATION, epochs ITERATION += 1 root_dir = os.path.dirname(os.path.realpath(__file__)) # although we user hyperOptParser, we are using it only as argparse right now parent_parser = ArgumentParser(add_help=False) # allow model to overwrite or extend args parser = DSANet.add_model_specific_args(parent_parser, root_dir) hyperparams = parser.parse_args() dataset = DataUtil(hyperparams, 2) if hasattr(dataset, 'scale'): #print('we have scale') setattr(hyperparams, 'scale', dataset.scale) #print(dataset.scale) if hasattr(dataset, 'scaler'): #print('we have scaler') setattr(hyperparams, 'scaler', dataset.scaler) #rint(dataset.scaler) setattr(hyperparams, 'n_multiv', dataset.m) setattr(hyperparams, 'batch_size', int(optimizer_params['batch_size'])) setattr(hyperparams, 'drop_prob', optimizer_params['dropout']) setattr(hyperparams, 'learning_rate', optimizer_params['learning_rate']) setattr(hyperparams, 'd_model', int(optimizer_params['units'])) setattr(hyperparams, 'local', int(optimizer_params['local'])) setattr(hyperparams, 'n_kernels', int(optimizer_params['n_kernels'])) setattr(hyperparams, 'window', int(optimizer_params['window'])) hparams = hyperparams print( f"\n#######\nTESTING hparams: mv:{hparams.n_multiv}, bs:{hparams.batch_size}, drop:{hparams.drop_prob}, lr:{hparams.learning_rate}, d_model:{hparams.d_model}, local:{hparams.local}, n_kernels:{hparams.n_kernels}, window:{hparams.window}\n#######" ) # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ print('loading model...') model = DSANet(hparams) print('model built') # ------------------------ # 2 INIT TEST TUBE EXP # ------------------------ filename = '{}{}{}{}{}{}'.format('my_dsanet_', hparams.data_name, '_', hparams.powerset, '_', str(hparams.calendar)) logger = TestTubeLogger("tb_logs_v2", filename) # ------------------------ # 3 DEFINE CALLBACKS # ------------------------ early_stop_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=False, mode='min') # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = pl.Trainer(gpus=4, distributed_backend='dp', logger=logger, early_stop_callback=early_stop_callback, show_progress_bar=False, profiler=True, fast_dev_run=False, max_epochs=100) # ------------------------ # 5 START TRAINING # ------------------------ st_time = datetime.now() result = trainer.fit(model) eval_result = model.val_results df1 = pd.DataFrame(eval_result, [ITERATION]) print(result) eval_time = str(datetime.now() - st_time) print(f"Train time: {eval_time}, Results: {eval_result}") st_time = datetime.now() model.hparams.mcdropout = 'True' trainer.test(model) eval_time = str(datetime.now() - st_time) test_result = model.test_results df2 = pd.DataFrame(test_result, [ITERATION]) print(f"Test time: {eval_time}, Results: {test_result}") df1 = pd.concat([df1, pd.DataFrame(vars(hparams), [ITERATION])], axis=1, sort=False) df2 = pd.concat([df2, pd.DataFrame(vars(hparams), [ITERATION])], axis=1, sort=False) val_results = pd.concat([val_results, df1], axis=0, sort=False) test_results = pd.concat([test_results, df2], axis=0, sort=False) return eval_result['val_nd_all']
def main(hparams): """ Main training routine specific for this project """ global val_results, test_results # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ print('loading model...') model = DSANet(hparams) print('model built') print( f"\n#######\nTESTING hparams: mv:{hparams.n_multiv}, bs:{hparams.batch_size}, drop:{hparams.drop_prob}, lr:{hparams.learning_rate}, d_model:{hparams.d_model}, local:{hparams.local}, n_kernels:{hparams.n_kernels}, window:{hparams.window}\n#######" ) # ------------------------ # 2 INIT TEST TUBE EXP # ------------------------ filename = '{}{}{}{}{}{}'.format('my_dsanet_', hparams.data_name, '_', hparams.powerset, '_', str(hparams.calendar)) logger = TestTubeLogger("tb_logs_v2", filename) # ------------------------ # 3 DEFINE CALLBACKS # ------------------------ early_stop_callback = EarlyStopping(monitor='val_loss', patience=35, verbose=False, mode='min') # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = pl.Trainer(gpus=2, distributed_backend='dp', logger=logger, early_stop_callback=early_stop_callback, show_progress_bar=False, profiler=True, fast_dev_run=False) # ------------------------ # 5 START TRAINING # ------------------------ if not hparams.test_only: st_time = datetime.now() result = trainer.fit(model) eval_result = model.val_results df1 = pd.DataFrame(eval_result, [0]) #print(result) eval_time = str(datetime.now() - st_time) print(f"Train time: {eval_time}, Results: {eval_result}") st_time = datetime.now() model.hparams.mcdropout = 'True' trainer.test(model) eval_time = str(datetime.now() - st_time) test_result = model.test_results df2 = pd.DataFrame(test_result, [0]) print(f"Test time: {eval_time}, Results: {test_result}") df1 = pd.concat([df1, pd.DataFrame(vars(hparams), [0])], axis=1, sort=False) df2 = pd.concat([df2, pd.DataFrame(vars(hparams), [0])], axis=1, sort=False) val_results = pd.concat([val_results, df1], axis=0, sort=False) test_results = pd.concat([test_results, df2], axis=0, sort=False) val_filename = '{}{}{}{}{}{}'.format(filename, '_', str(hparams.window), '_', str(hparams.horizon), '_val.csv') test_filename = '{}{}{}{}{}{}'.format(filename, '_', str(hparams.window), '_', str(hparams.horizon), '_test.csv') val_results.to_csv(val_filename, mode='a') test_results.to_csv(test_filename, mode='a') else: st_time = datetime.now() model.hparams.mcdropout = 'True' trainer.test(model) eval_time = str(datetime.now() - st_time) test_result = model.test_results df2 = pd.DataFrame(test_result, [0]) print(f"Test time: {eval_time}, Results: {test_result}") df2 = pd.concat([df2, pd.DataFrame(vars(hparams), [0])], axis=1, sort=False) test_results = pd.concat([test_results, df2], axis=0, sort=False) test_filename = '{}{}{}{}{}{}'.format(filename, '_', str(hparams.window), '_', str(hparams.horizon), '_test.csv') test_results.to_csv(test_filename, mode='a')
def optimize(optimizer_params): """ Main training routine specific for this project """ logging.basicConfig(level=logging.INFO) # dirs root_dir = os.path.dirname(os.path.realpath(__file__)) demo_log_dir = os.path.join(root_dir, 'dsanet_logs') checkpoint_dir = os.path.join(demo_log_dir, 'model_weights') test_tube_dir = os.path.join(demo_log_dir, 'test_tube_data') # although we user hyperOptParser, we are using it only as argparse right now parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False) # gpu args parent_parser.add_argument('--test_tube_save_path', type=str, default=test_tube_dir, help='where to save logs') parent_parser.add_argument('--model_save_path', type=str, default=checkpoint_dir, help='where to save model') # allow model to overwrite or extend args parser = DSANet.add_model_specific_args(parent_parser, root_dir) hyperparams = parser.parse_args() print(hyperparams) setattr(hyperparams, 'batch_size', int(optimizer_params['batch_size'])) setattr(hyperparams, 'drop_prob', optimizer_params['dropout']) setattr(hyperparams, 'learning_rate', optimizer_params['learning_rate']) setattr(hyperparams, 'd_model', int(optimizer_params['units'])) # hyperparams['batch_size'] = optimizer_params['batch_size'] # hyperparams['drop_prob'] = optimizer_params['dropout'] # hyperparams['learning_rate'] = optimizer_params['learning_rate'] # hyperparams['d_model'] = optimizer_params['units'] print(hyperparams) hparams = hyperparams # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ print('loading model...') model = DSANet(hparams) print('model built') # ------------------------ # 2 INIT TEST TUBE EXP # ------------------------ # init experiment exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format( hparams.data_name, hparams.window, hparams.horizon), save_dir=hparams.test_tube_save_path, autosave=False, description='test demo') exp.argparse(hparams) exp.save() # ------------------------ # 3 DEFINE CALLBACKS # ------------------------ model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version) checkpoint_callback = ModelCheckpoint(filepath=model_save_path, save_best_only=True, verbose=True, monitor='val_loss', mode='auto') early_stop = EarlyStopping(monitor='val_loss', patience=25, verbose=True, mode='min') # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = Trainer( gpus="0,1", distributed_backend='ddp', experiment=exp, early_stop_callback=early_stop, checkpoint_callback=checkpoint_callback, ) # ------------------------ # 5 START TRAINING # ------------------------ st_time = datetime.now() trainer.fit(model) eval_time = str(datetime.now() - st_time) print("Iteration %d: Getting results ... " % ITERATION) csv_load_path = '{}/{}/{}{}'.format(hparams.test_tube_save_path, exp.name, 'version_', exp.version) df = pd.read_csv('{}/{}'.format( csv_load_path, 'metrics.csv')) # change to experiment save dir min_idx = df['val_nd'].idxmin() of_connection = open(out_file, 'a') writer = csv.writer(of_connection) writer.writerow([ optimizer_params, hparams, df['tng_loss'].iloc[min_idx], df['val_loss'].iloc[min_idx], df['val_nd'].iloc[min_idx], df['NRMSE'].iloc[min_idx], df['val_rho10'].iloc[min_idx], df['val_rho50'].iloc[min_idx], df['val_rho90'].iloc[min_idx], eval_time, STATUS_OK ]) of_connection.close() return { 'loss': df['val_nd'].iloc[min_idx], 'ND': df['val_nd'].iloc[min_idx], 'NRMSE': df['NRMSE'].iloc[min_idx], 'val_loss': df['val_loss'].iloc[min_idx], 'params': optimizer_params, 'rho_metric': { 'rho10': df['val_rho10'].iloc[min_idx], 'rho50': df['val_rho50'].iloc[min_idx], 'rho90': df['val_rho90'].iloc[min_idx] }, 'iteration': ITERATION, 'eval_time': eval_time, 'status': STATUS_OK }