def main(): # 读取数据与参数 Data = DataLoaderS(args.data, 0.6, 0.2, device, args.horizon, args.seq_in_len, args.normalize) # 生成神经网络模型 model = gtnet(args.gcn_true, args.buildA_true, args.gcn_depth, args.num_nodes, device, dropout=args.dropout, subgraph_size=args.subgraph_size, node_dim=args.node_dim, dilation_exponential=args.dilation_exponential, conv_channels=args.conv_channels, residual_channels=args.residual_channels, skip_channels=args.skip_channels, end_channels=args.end_channels, seq_length=args.seq_in_len, in_dim=args.in_dim, out_dim=args.seq_out_len, layers=args.layers, propalpha=args.propalpha, tanhalpha=args.tanhalpha, layer_norm_affline=False) # 将生成模型加载到相应设备中 model = model.to(device) # 输出模型args实例 print(args) print('The recpetive field size is', model.receptive_field) nParams = sum([p.nelement() for p in model.parameters()]) print('Number of model parameters is', nParams, flush=True) # 计算损失函数 if args.L1Loss: criterion = nn.L1Loss(size_average=False).to(device) else: criterion = nn.MSELoss(size_average=False).to(device) # 计算预测值和真实值之差的平方的平均数 evaluateL2 = nn.MSELoss(size_average=False).to(device) # 计算预测值和真实值之差的绝对值的平均数 evaluateL1 = nn.L1Loss(size_average=False).to(device) best_val = 10000000 optim = Optim(model.parameters(), args.optim, args.lr, args.clip, lr_decay=args.weight_decay) # 在任何时刻都可以按下Ctrl + C来提前停止训练 try: print('begin training') # epoch为一次训练迭代次数 for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() # 计算训练函数train对于训练数据集train的误差 train_loss = train(Data, Data.train[0], Data.train[1], model, criterion, optim, args.batch_size) # 计算评价函数evaluate对于验证数据集valid的误差 val_loss, val_rae, val_corr = evaluate(Data, Data.valid[0], Data.valid[1], model, evaluateL2, evaluateL1, args.batch_size) print( '| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid rse {:5.4f} | valid rae {:5.4f} | valid corr {:5.4f}' .format(epoch, (time.time() - epoch_start_time), train_loss, val_loss, val_rae, val_corr), flush=True) # 保存最小损失的最优模型 if val_loss < best_val: with open(args.save, 'wb') as f: torch.save(model, f) best_val = val_loss # 每迭代5次输出一下误差系数 if epoch % 5 == 0: test_acc, test_rae, test_corr = evaluate( Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args.batch_size) print( "test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}". format(test_acc, test_rae, test_corr), flush=True) except KeyboardInterrupt: # 键盘按下Ctrl + C时,输出一堆‘-’ print('-' * 89) print('Exiting from training early') # 加载保存的最优模型 with open(args.save, 'rb') as f: model = torch.load(f) # 计算评价函数evaluate对于验证数据集valid和测试数据集test的误差 vtest_acc, vtest_rae, vtest_corr = evaluate(Data, Data.valid[0], Data.valid[1], model, evaluateL2, evaluateL1, args.batch_size) test_acc, test_rae, test_corr = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args.batch_size) print( "final test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format( test_acc, test_rae, test_corr)) return vtest_acc, vtest_rae, vtest_corr, test_acc, test_rae, test_corr
def main(runid): # torch.manual_seed(args.seed) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(args.seed) #load data device = torch.device(args.device) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataloader = load_dataset(args.data, args.batch_size, args.batch_size, args.batch_size) scaler = dataloader['scaler'] predefined_A = load_adj(args.adj_data) predefined_A = torch.tensor(predefined_A) - torch.eye(args.num_nodes) predefined_A = predefined_A.to(device) # if args.load_static_feature: # static_feat = load_node_feature('data/sensor_graph/location.csv') # else: # static_feat = None model = gtnet(args.gcn_true, args.buildA_true, args.gcn_depth, args.num_nodes, device, predefined_A=predefined_A, dropout=args.dropout, subgraph_size=args.subgraph_size, node_dim=args.node_dim, dilation_exponential=args.dilation_exponential, conv_channels=args.conv_channels, residual_channels=args.residual_channels, skip_channels=args.skip_channels, end_channels=args.end_channels, seq_length=args.seq_in_len, in_dim=args.in_dim, out_dim=args.seq_out_len, layers=args.layers, propalpha=args.propalpha, tanhalpha=args.tanhalpha, layer_norm_affline=True) print(args) print('The recpetive field size is', model.receptive_field) nParams = sum([p.nelement() for p in model.parameters()]) print('Number of model parameters is', nParams) engine = Trainer(model, args.learning_rate, args.weight_decay, args.clip, args.step_size1, args.seq_out_len, scaler, device, args.cl) print("start training...", flush=True) his_loss = [] val_time = [] train_time = [] minl = 1e5 for i in range(1, args.epochs + 1): train_loss = [] train_mape = [] train_rmse = [] t1 = time.time() dataloader['train_loader'].shuffle() for iter, (x, y) in enumerate(dataloader['train_loader'].get_iterator()): trainx = torch.Tensor(x).to(device) trainx = trainx.transpose(1, 3) trainy = torch.Tensor(y).to(device) trainy = trainy.transpose(1, 3) if iter % args.step_size2 == 0: perm = np.random.permutation(range(args.num_nodes)) num_sub = int(args.num_nodes / args.num_split) for j in range(args.num_split): if j != args.num_split - 1: id = perm[j * num_sub:(j + 1) * num_sub] else: id = perm[j * num_sub:] id = torch.tensor(id).to(device) tx = trainx[:, :, id, :] ty = trainy[:, :, id, :] metrics = engine.train(tx, ty[:, 0, :, :], id) train_loss.append(metrics[0]) train_mape.append(metrics[1]) train_rmse.append(metrics[2]) if iter % args.print_every == 0: log = 'Iter: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}' print(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]), flush=True) t2 = time.time() train_time.append(t2 - t1) #validation valid_loss = [] valid_mape = [] valid_rmse = [] s1 = time.time() for iter, (x, y) in enumerate(dataloader['val_loader'].get_iterator()): testx = torch.Tensor(x).to(device) testx = testx.transpose(1, 3) testy = torch.Tensor(y).to(device) testy = testy.transpose(1, 3) metrics = engine.eval(testx, testy[:, 0, :, :]) valid_loss.append(metrics[0]) valid_mape.append(metrics[1]) valid_rmse.append(metrics[2]) s2 = time.time() log = 'Epoch: {:03d}, Inference Time: {:.4f} secs' print(log.format(i, (s2 - s1))) val_time.append(s2 - s1) mtrain_loss = np.mean(train_loss) mtrain_mape = np.mean(train_mape) mtrain_rmse = np.mean(train_rmse) mvalid_loss = np.mean(valid_loss) mvalid_mape = np.mean(valid_mape) mvalid_rmse = np.mean(valid_rmse) his_loss.append(mvalid_loss) log = 'Epoch: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}, Valid Loss: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}, Training Time: {:.4f}/epoch' print(log.format(i, mtrain_loss, mtrain_mape, mtrain_rmse, mvalid_loss, mvalid_mape, mvalid_rmse, (t2 - t1)), flush=True) if mvalid_loss < minl: torch.save( engine.model.state_dict(), args.save + "exp" + str(args.expid) + "_" + str(runid) + ".pth") minl = mvalid_loss print("Average Training Time: {:.4f} secs/epoch".format( np.mean(train_time))) print("Average Inference Time: {:.4f} secs".format(np.mean(val_time))) bestid = np.argmin(his_loss) engine.model.load_state_dict( torch.load(args.save + "exp" + str(args.expid) + "_" + str(runid) + ".pth")) print("Training finished") print("The valid loss on best model is", str(round(his_loss[bestid], 4))) #valid data outputs = [] realy = torch.Tensor(dataloader['y_val']).to(device) realy = realy.transpose(1, 3)[:, 0, :, :] for iter, (x, y) in enumerate(dataloader['val_loader'].get_iterator()): testx = torch.Tensor(x).to(device) testx = testx.transpose(1, 3) with torch.no_grad(): preds = engine.model(testx) preds = preds.transpose(1, 3) outputs.append(preds.squeeze()) yhat = torch.cat(outputs, dim=0) yhat = yhat[:realy.size(0), ...] pred = scaler.inverse_transform(yhat) vmae, vmape, vrmse = metric(pred, realy) #test data outputs = [] realy = torch.Tensor(dataloader['y_test']).to(device) realy = realy.transpose(1, 3)[:, 0, :, :] for iter, (x, y) in enumerate(dataloader['test_loader'].get_iterator()): testx = torch.Tensor(x).to(device) testx = testx.transpose(1, 3) with torch.no_grad(): preds = engine.model(testx) preds = preds.transpose(1, 3) outputs.append(preds.squeeze()) yhat = torch.cat(outputs, dim=0) yhat = yhat[:realy.size(0), ...] mae = [] mape = [] rmse = [] for i in range(args.seq_out_len): pred = scaler.inverse_transform(yhat[:, :, i]) real = realy[:, :, i] metrics = metric(pred, real) log = 'Evaluate best model on test data for horizon {:d}, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}' print(log.format(i + 1, metrics[0], metrics[1], metrics[2])) mae.append(metrics[0]) mape.append(metrics[1]) rmse.append(metrics[2]) return vmae, vmape, vrmse, mae, mape, rmse
def main(): Data = DataLoaderS(args.data, 0.6, 0.2, device, args.horizon, args.seq_in_len, args.normalize) model = gtnet(args.gcn_true, args.buildA_true, args.gcn_depth, args.num_nodes, device, dropout=args.dropout, subgraph_size=args.subgraph_size, node_dim=args.node_dim, dilation_exponential=args.dilation_exponential, conv_channels=args.conv_channels, residual_channels=args.residual_channels, skip_channels=args.skip_channels, end_channels= args.end_channels, seq_length=args.seq_in_len, in_dim=args.in_dim, out_dim=args.seq_out_len, layers=args.layers, propalpha=args.propalpha, tanhalpha=args.tanhalpha, layer_norm_affline=False) model = model.to(device) print(args) print('The recpetive field size is', model.receptive_field) nParams = sum([p.nelement() for p in model.parameters()]) print('Number of model parameters is', nParams, flush=True) if args.L1Loss: criterion = nn.L1Loss(size_average=False).to(device) else: criterion = nn.MSELoss(size_average=False).to(device) evaluateL2 = nn.MSELoss(size_average=False).to(device) evaluateL1 = nn.L1Loss(size_average=False).to(device) best_val = 10000000 optim = Optim( model.parameters(), args.optim, args.lr, args.clip, lr_decay=args.weight_decay ) # At any point you can hit Ctrl + C to break out of training early. try: print('begin training') for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train_loss = train(Data, Data.train[0], Data.train[1], model, criterion, optim, args.batch_size) val_loss, val_rae, val_corr = evaluate(Data, Data.valid[0], Data.valid[1], model, evaluateL2, evaluateL1, args.batch_size) print( '| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid rse {:5.4f} | valid rae {:5.4f} | valid corr {:5.4f}'.format( epoch, (time.time() - epoch_start_time), train_loss, val_loss, val_rae, val_corr), flush=True) # Save the model if the validation loss is the best we've seen so far. if val_loss < best_val: with open(args.save, 'wb') as f: torch.save(model, f) best_val = val_loss if epoch % 5 == 0: test_acc, test_rae, test_corr = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args.batch_size) print("test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format(test_acc, test_rae, test_corr), flush=True) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model. with open(args.save, 'rb') as f: model = torch.load(f) vtest_acc, vtest_rae, vtest_corr = evaluate(Data, Data.valid[0], Data.valid[1], model, evaluateL2, evaluateL1, args.batch_size) test_acc, test_rae, test_corr = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args.batch_size) print("final test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format(test_acc, test_rae, test_corr)) return vtest_acc, vtest_rae, vtest_corr, test_acc, test_rae, test_corr