def main(args): """main""" # PeMS = data_gen_mydata(args.input_file, args.label_file, args.n_route, args.n_his, # args.n_pred, (args.n_val, args.n_test)) PeMS = data_gen_custom(args.input_file, args.label_file, args.city_file, args.n_route, args.n_his, args.n_pred, (args.n_val, args.n_test)) log.info(PeMS.get_stats()) log.info(PeMS.get_len('train')) gf = GraphFactory(args) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.GraphWrapper("gw", place, node_feat=[('norm', [None, 1], "float32")], edge_feat=[('weights', [None, 1], "float32")]) model = STGCNModel(args, gw) train_loss, y_pred = model.forward() infer_program = train_program.clone(for_test=True) with fluid.program_guard(train_program, startup_program): epoch_step = int(PeMS.get_len('train') / args.batch_size) + 1 lr = fl.exponential_decay(learning_rate=args.lr, decay_steps=5 * epoch_step, decay_rate=0.7, staircase=True) if args.opt == 'RMSProp': train_op = fluid.optimizer.RMSPropOptimizer(lr).minimize( train_loss) elif args.opt == 'ADAM': train_op = fluid.optimizer.Adam(lr).minimize(train_loss) exe = fluid.Executor(place) exe.run(startup_program) if args.inf_mode == 'sep': # for inference mode 'sep', the type of step index is int. step_idx = args.n_pred - 1 tmp_idx = [step_idx] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) elif args.inf_mode == 'merge': # for inference mode 'merge', the type of step index is np.ndarray. step_idx = tmp_idx = np.arange(3, args.n_pred + 1, 3) - 1 min_val = min_va_val = np.array([4e1, 1e5, 1e5]) * len(step_idx) else: raise ValueError(f'ERROR: test mode "{inf_mode}" is not defined.') step = 0 for epoch in range(1, args.epochs + 1): for idx, x_batch in enumerate( gen_batch(PeMS.get_data('train'), args.batch_size, dynamic_batch=True, shuffle=True)): x = np.array(x_batch[:, 0:args.n_his, :, :], dtype=np.float32) graph = gf.build_graph(x) feed = gw.to_feed(graph) feed['input'] = np.array(x_batch[:, 0:args.n_his + 1, :, :], dtype=np.float32) b_loss, b_lr = exe.run(train_program, feed=feed, fetch_list=[train_loss, lr]) if idx % 5 == 0: log.info("epoch %d | step %d | lr %.6f | loss %.6f" % (epoch, idx, b_lr[0], b_loss[0])) min_va_val, min_val = \ model_inference(exe, gw, gf, infer_program, y_pred, PeMS, args, \ step_idx, min_va_val, min_val) for ix in tmp_idx: va, te = min_va_val[ix - 2:ix + 1], min_val[ix - 2:ix + 1] print(f'Time Step {ix + 1}: ' f'MAPE {va[0]:7.3%}, {te[0]:7.3%}; ' f'MAE {va[1]:4.3f}, {te[1]:4.3f}; ' f'RMSE {va[2]:6.3f}, {te[2]:6.3f}.') if epoch % 5 == 0: model_test(exe, gw, gf, infer_program, y_pred, PeMS, args)
def model_train(inputs, blocks, args, sum_path='./output/tensorboard', output_dim=1): ''' Train the base model. :param inputs: instance of class Dataset, data source for training. :param blocks: list, channel configs of st_conv blocks. :param args: instance of class argparse, args for training. ''' n, n_his, n_pred = args.n_route, args.seq_len, args.horizon Ks, Kt = args.ks, args.kt batch_size, epoch, inf_mode, opt = args.batch_size, args.epoch, args.inf_mode, args.opt # Placeholder for model training x = tf.compat.v1.placeholder(tf.float32, [None, n_his + 1, n, 3], name='data_input') keep_prob = tf.compat.v1.placeholder(tf.float32, name='keep_prob') # Define model loss, for one step forecasting... train_loss, pred = build_model(x, n_his, Ks, Kt, blocks, keep_prob, output_dim=output_dim) tf.summary.scalar('train_loss', train_loss) # copy loss just using the previous step as current step prediction copy_loss = tf.add_n(tf.get_collection('copy_loss')) tf.summary.scalar('copy_loss', copy_loss) # Learning rate settings global_steps = tf.Variable(0, trainable=False) len_train = inputs.get_len('train') if len_train % batch_size == 0: epoch_step = len_train / batch_size else: epoch_step = int(len_train / batch_size) + 1 # Learning rate decay with rate 0.7 every 5 epochs. lr = tf.train.exponential_decay(args.lr, global_steps, decay_steps=5 * epoch_step, decay_rate=0.7, staircase=True) tf.summary.scalar('learning_rate', lr) step_op = tf.assign_add(global_steps, 1) with tf.control_dependencies([step_op]): if opt == 'RMSProp': train_op = tf.train.RMSPropOptimizer(lr).minimize(train_loss) elif opt == 'ADAM': train_op = tf.train.AdamOptimizer(lr).minimize(train_loss) else: raise ValueError(f'ERROR: optimizer "{opt}" is not defined.') merged = tf.summary.merge_all() with tf.Session() as sess: writer = tf.summary.FileWriter(pjoin(sum_path, 'train'), sess.graph) sess.run(tf.global_variables_initializer()) if inf_mode == 'sep': # for inference mode 'sep', the type of step index is int. step_idx = n_pred - 1 tmp_idx = [step_idx] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) elif inf_mode == 'merge': # for inference mode 'merge', the type of step index is np.ndarray. # step_idx = tmp_idx = np.arange(3, n_pred + 1, 3) - 1 step_idx = tmp_idx = np.arange(n_pred) min_val = min_va_val = np.array([4e1, 1e5, 1e5] * len(step_idx)) else: raise ValueError(f'ERROR: test mode "{inf_mode}" is not defined.') for i in range(epoch): start_time = time.time() for j, x_batch in enumerate( gen_batch(inputs.get_data('train'), batch_size, dynamic_batch=True, shuffle=True)): summary, _ = sess.run([merged, train_op], feed_dict={ x: x_batch[:, 0:n_his + 1, :, :], keep_prob: 1.0 }) writer.add_summary(summary, i * epoch_step + j) if j % 50 == 0: loss_value = \ sess.run([train_loss, copy_loss], feed_dict={x: x_batch[:, 0:n_his + 1, :, :], keep_prob: 1.0}) print( f'Epoch {i:2d}, Step {j:3d}: [model_loss: {loss_value[0]:.3f}, copy_loss: {loss_value[1]:.3f}]', flush=True) # # for testing # min_va_val, min_val = \ # model_inference(sess, pred, inputs, batch_size, n_his, n_pred, step_idx, min_va_val, min_val) # for ix in tmp_idx: # va, te = min_va_val[ix*3:(ix + 1)*3], min_val[ix*3:(ix + 1)*3] # # va, te = min_va_val[ix], min_val[ix] # print(f'Time Step {ix + 1}: ' # f'MAPE {va[0]:7.3%}, {te[0]:7.3%}; ' # f'MAE {va[1]:4.3f}, {te[1]:4.3f}; ' # f'RMSE {va[2]:6.3f}, {te[2]:6.3f}.', flush=True) # print(f'Epoch {i:2d} Inference Time {time.time() - start_time:.3f}s', flush=True) print( f'Epoch {i:2d} Training Time {time.time() - start_time:.3f}s') start_time = time.time() min_va_val, min_val = \ model_inference(sess, pred, inputs, batch_size, n_his, n_pred, step_idx, min_va_val, min_val) for ix in tmp_idx: va, te = min_va_val[ix * 3:(ix + 1) * 3], min_val[ix * 3:(ix + 1) * 3] # va, te = min_va_val[ix], min_val[ix] print(f'Time Step {ix + 1}: ' f'MAPE {va[0]:7.3%}, {te[0]:7.3%}; ' f'MAE {va[1]:4.3f}, {te[1]:4.3f}; ' f'RMSE {va[2]:6.3f}, {te[2]:6.3f}.') print( f'Epoch {i:2d} Inference Time {time.time() - start_time:.3f}s') if (i + 1) % args.save == 0: model_save(sess, global_steps, 'STGCN') writer.close() print('Training model finished!')
def model_train(inputs, blocks, args, sum_path='./output/tensorboard',load_path='./output/models/',load=False): ''' Train the base model. :param inputs: instance of class Dataset, data source for training. :param blocks: list, channel configs of st_conv blocks. :param args: instance of class argparse, args for training. ''' n, n_his, n_pred = args.n_route, args.n_his, args.n_pred Ks, Kt = args.ks, args.kt batch_size, epoch, inf_mode, opt = args.batch_size, args.epoch, args.inf_mode, args.opt # Placeholder for model training x = tf.placeholder(tf.float32, [None, n_his + n_pred, n, 1], name='data_input') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # Define model loss train_loss, pred = build_model(x, n_his, Ks, Kt, blocks, keep_prob) tf.summary.scalar('train_loss', train_loss) copy_loss = tf.add_n(tf.get_collection('copy_loss')) tf.summary.scalar('copy_loss', copy_loss) # Learning rate settings global_steps = tf.Variable(0, trainable=False) len_train = inputs.get_len('train') if len_train % batch_size == 0: epoch_step = len_train / batch_size else: epoch_step = int(len_train / batch_size) + 1 # Learning rate decay with rate 0.7 every 5 epochs. lr = tf.train.exponential_decay(args.lr, global_steps, decay_steps=5 * epoch_step, decay_rate=0.7, staircase=True) tf.summary.scalar('learning_rate', lr) step_op = tf.assign_add(global_steps, 1) with tf.control_dependencies([step_op]): if opt == 'RMSProp': train_op = tf.train.RMSPropOptimizer(lr).minimize(train_loss) elif opt == 'ADAM': train_op = tf.train.AdamOptimizer(lr).minimize(train_loss) else: raise ValueError(f'ERROR: optimizer "{opt}" is not defined.') merged = tf.summary.merge_all() ########################################################################## saver = tf.train.Saver(max_to_keep=3); config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(pjoin(sum_path, 'train'), sess.graph) if(load): ckpt = tf.train.get_checkpoint_state(load_path) saver.restore(sess, ckpt.model_checkpoint_path) print(f'>> Loading saved model from {load_path} ...') else: sess.run(tf.global_variables_initializer()) if inf_mode == 'sep': # for inference mode 'sep', the type of step index is int. step_idx = n_pred - 1 tmp_idx = [step_idx] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) elif inf_mode == 'merge': # for inference mode 'merge', the type of step index is np.ndarray. # step_idx = tmp_idx = np.arange(3, n_pred + 1, 3) - 1 step_idx = tmp_idx = np.array([1,2,3,4,5,6,7,8])-1 min_val = min_va_val = np.array([4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5]) else: raise ValueError(f'ERROR: test mode "{inf_mode}" is not defined.') for i in range(epoch): start_time = time.time() for j, x_batch in enumerate( gen_batch(inputs.get_data('train'), batch_size, dynamic_batch=True, shuffle=True)): summary, _ = sess.run([merged, train_op], feed_dict={x: x_batch[:, 0:n_his + n_pred, :, :], keep_prob: 1.0}) writer.add_summary(summary, i * epoch_step + j) if j % 50 == 0: loss_value = \ sess.run([train_loss, copy_loss], feed_dict={x: x_batch[:, 0:n_his + n_pred, :, :], keep_prob: 1.0}) print(f'Epoch {i:2d}, Step {j:3d}: [{loss_value[0]:.3f}, {loss_value[1]:.3f}]') print(f'Epoch {i:2d} Training Time {time.time() - start_time:.3f}s') if (i + 1) % args.save == 0: model_save(sess, global_steps, 'STGCN') # print('sleep begin') # time.sleep(90) # print('sleep end') if((i+1)%5!=0): continue start_time = time.time() min_va_val, min_val = \ model_inference(sess, pred, inputs, batch_size, n_his, n_pred, step_idx, min_va_val, min_val) cnt=0; for ix in tmp_idx: # va, te = min_va_val[ix - 2:ix + 1], min_val[ix - 2:ix + 1] va,te=min_va_val[cnt:cnt+3],min_val[cnt:cnt+3] cnt+=3 print(f'Time Step {ix + 1}: ' f'MAPE {va[0]:7.3%}; ' f'MAE {va[1]:4.3f}; ' f'RMSE {va[2]:6.3f}.') print(f'Epoch {i:2d} Inference Time {time.time() - start_time:.3f}s') writer.close() print('Training model finished!')