def main(args): # set seed torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) np.random.seed(np_seed) random.seed(py_seed) # Build the models # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 100 elif args.env_type == 'cartpole_obs_2': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP2 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 100 elif args.env_type == 'cartpole_obs_3': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP4 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 200 elif args.env_type == 'cartpole_obs_4': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 200 elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None obs_f = True obs_width = 6.0 #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'acrobot') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) mpnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp, None) # load net # load previously trained model if start epoch > 0 model_dir = args.model_dir if args.loss == 'mse': if args.multigoal == 0: model_dir = model_dir + args.env_type + "_lr%f_%s_step_%d/" % ( args.learning_rate, args.opt, args.num_steps) else: model_dir = model_dir + args.env_type + "_lr%f_%s_step_%d_multigoal/" % ( args.learning_rate, args.opt, args.num_steps) else: if args.multigoal == 0: model_dir = model_dir + args.env_type + "_lr%f_%s_loss_%s_step_%d/" % ( args.learning_rate, args.opt, args.loss, args.num_steps) else: model_dir = model_dir + args.env_type + "_lr%f_%s_loss_%s_step_%d_multigoal/" % ( args.learning_rate, args.opt, args.loss, args.num_steps) print(model_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) model_path = 'kmpnet_epoch_%d_direction_%d_step_%d.pkl' % ( args.start_epoch, args.direction, args.num_steps) torch_seed, np_seed, py_seed = 0, 0, 0 if args.start_epoch > 0: #load_net_state(mpnet, os.path.join(args.model_path, model_path)) load_net_state(mpnet, os.path.join(model_dir, model_path)) #torch_seed, np_seed, py_seed = load_seed(os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(model_dir, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpnet.cuda() mpnet.mlp.cuda() mpnet.encoder.cuda() if args.opt == 'Adagrad': mpnet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpnet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpnet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) elif args.opt == 'ASGD': mpnet.set_opt(torch.optim.ASGD, lr=args.learning_rate) if args.start_epoch > 0: #load_opt_state(mpnet, os.path.join(args.model_path, model_path)) load_opt_state(mpnet, os.path.join(model_dir, model_path)) mpnet.eval() # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. # find path plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') def update_line(h, ax, new_data): h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def draw_update_line(ax): ax.relim() ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() # randomly pick up a point in the data, and find similar data in the dataset # plot the next point obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data for envi in range(2): for pathi in range(10): obs_i = obs[envi] new_obs_i = [] obs_i = obs[envi] plan_res_path = [] plan_time_path = [] plan_cost_path = [] data_cost_path = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(121) ax_vel = fig.add_subplot(122) #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) ax_vel.set_xlim(-40, 40) ax_vel.set_ylim(-2, 2) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * 30. / dx) jmin = 0 jmax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state xs = paths[envi][pathi] us = controls[envi][pathi] ts = costs[envi][pathi] # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) max_steps = int(ts[k] / step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = xs[k] state[-1] = xs[k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = xs[-1] #print(len(state)) xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], psopt_system) ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='green') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) ax_vel.scatter(xs_to_plot[:, 1], xs_to_plot[:, 3], c='green', s=0.1) draw_update_line(ax_vel) plt.waitforbuttonpress() # visualize mPNet path mpnet_paths = [] state = xs[0] #for k in range(int(len(xs_to_plot)/args.num_steps)): for k in range(50): mpnet_paths.append(state) bi = np.concatenate([state, xs[-1]]) bi = np.array([bi]) bi = torch.from_numpy(bi).type(torch.FloatTensor) print(bi) bi = normalize(bi, args.world_size) bi = to_var(bi) if obc is None: bobs = None else: bobs = np.array([obc[envi]]).astype(np.float32) print(bobs.shape) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) bt = mpnet(bi, bobs).cpu() bt = unnormalize(bt, args.world_size) bt = bt.detach().numpy() print(bt.shape) state = bt[0] print(mpnet_paths) xs_to_plot = np.array(mpnet_paths) print(len(xs_to_plot)) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], psopt_system) ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='lightgreen') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) ax_vel.scatter(xs_to_plot[:, 1], xs_to_plot[:, 3], c='lightgreen') draw_update_line(ax_vel) plt.waitforbuttonpress()
def main(args): #global hl if torch.cuda.is_available(): torch.cuda.set_device(args.device) # environment setting cae = cae_identity mlp = MLP cpp_propagator = _sst_module.SystemPropagator() if args.env_type == 'pendulum': normalize = pendulum.normalize unnormalize = pendulum.unnormalize system = standard_cpp_systems.PSOPTPendulum() dynamics = None enforce_bounds = None step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole': normalize = cart_pole.normalize unnormalize = cart_pole.unnormalize dynamics = cartpole.dynamics system = _sst_module.CartPole() enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.CartPole() dynamics = cartpole.dynamics enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'acrobot_obs': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_2': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP2 cae = CAE_acrobot_voxel_2d_2 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_3': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_2 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_4': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_5': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_6': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP4 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_7': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP5 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_8': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP6 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 mpnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) # load net # load previously trained model if start epoch > 0 model_dir = args.model_dir model_dir = model_dir + 'cost_' + args.env_type + "_lr%f_%s_step_%d/" % ( args.learning_rate, args.opt, args.num_steps) if not os.path.exists(model_dir): os.makedirs(model_dir) model_path = 'cost_kmpnet_epoch_%d_direction_%d_step_%d.pkl' % ( args.start_epoch, args.direction, args.num_steps) torch_seed, np_seed, py_seed = 0, 0, 0 if args.start_epoch > 0: #load_net_state(mpnet, os.path.join(args.model_path, model_path)) load_net_state(mpnet, os.path.join(model_dir, model_path)) #torch_seed, np_seed, py_seed = load_seed(os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(model_dir, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpnet.cuda() mpnet.mlp.cuda() mpnet.encoder.cuda() if args.opt == 'Adagrad': mpnet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpnet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpnet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) elif args.opt == 'ASGD': mpnet.set_opt(torch.optim.ASGD, lr=args.learning_rate) if args.start_epoch > 0: #load_opt_state(mpnet, os.path.join(args.model_path, model_path)) load_opt_state(mpnet, os.path.join(model_dir, model_path)) mpnet.eval() # load train and test data print('loading...') obs, cost_dataset, cost_targets, env_indices, \ _, _, _, _ = data_loader.load_train_dataset_cost(N=args.no_env, NP=args.no_motion_paths, data_folder=args.path_folder, obs_f=True, direction=args.direction, dynamics=dynamics, enforce_bounds=enforce_bounds, system=system, step_sz=step_sz, num_steps=args.num_steps) # randomize the dataset before training data = list(zip(cost_dataset, cost_targets, env_indices)) random.shuffle(data) dataset, targets, env_indices = list(zip(*data)) dataset = list(dataset) targets = list(targets) env_indices = list(env_indices) dataset = np.array(dataset) targets = np.array(targets) env_indices = np.array(env_indices) val_i = 0 for i in range(0, len(dataset), args.batch_size): # validation # calculate the corresponding batch in val_dataset dataset_i = dataset[i:i + args.batch_size] targets_i = targets[i:i + args.batch_size] env_indices_i = env_indices[i:i + args.batch_size] # record bi = dataset_i.astype(np.float32) print('bi shape:') print(bi.shape) bt = targets_i bi = torch.FloatTensor(bi) bt = torch.FloatTensor(bt) bi = normalize(bi, args.world_size) bi = to_var(bi) bt = to_var(bt) if obs is None: bobs = None else: bobs = obs[env_indices_i].astype(np.float32) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) print('cost network output: ') print(mpnet(bi, bobs).cpu().data) print('target: ') print(bt.cpu().data)
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models if torch.cuda.is_available(): torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': IsInCollision = pendulum.IsInCollision normalize = pendulum.normalize unnormalize = pendulum.unnormalize obs_file = None obc_file = None dynamics = pendulum.dynamics jax_dynamics = pendulum.jax_dynamics enforce_bounds = pendulum.enforce_bounds cae = cae_identity mlp = MLP obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': IsInCollision = cartpole.IsInCollision normalize = cartpole.normalize unnormalize = cartpole.unnormalize obs_file = None obc_file = None dynamics = cartpole.dynamics jax_dynamics = cartpole.jax_dynamics enforce_bounds = cartpole.enforce_bounds cae = CAE_acrobot_voxel_2d mlp = mlp_acrobot.MLP obs_f = True #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': IsInCollision = acrobot_obs.IsInCollision #IsInCollision = lambda x, obs: False normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d mlp = mlp_acrobot.MLP obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_2': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_2 mlp = mlp_acrobot.MLP2 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_3': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_2 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_5': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_3 mlp = mlp_acrobot.MLP obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_6': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None xdot = acrobot_obs.dynamics system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_3 mlp = mlp_acrobot.MLP4 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_6': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None xdot = acrobot_obs.dynamics system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds mlp = mlp_acrobot.MLP5 cae = CAE_acrobot_voxel_2d_3 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_8': IsInCollision = acrobot_obs.IsInCollision #IsInCollision = lambda x, obs: False normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_3 mlp = mlp_acrobot.MLP6 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 #num_steps = 21 num_steps = 21 #args.num_steps*2 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) #traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: #def cem_trajopt(x0, x1, step_sz, num_steps, x_init, u_init, t_init): # u, t = acrobot_obs.trajopt(x0, x1, 500, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init) # xs, us, dts, valid = propagate(x0, u, t, dynamics=dynamics, enforce_bounds=enforce_bounds, IsInCollision=lambda x: False, system=system, step_sz=step_sz) # return xs, us, dts #traj_opt = cem_trajopt goal_S0 = np.diag([1., 1., 0, 0]) goal_rho0 = 1.0 mpNet0 = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) mpNet1 = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) model_path = 'kmpnet_epoch_%d_direction_0.pkl' % (args.start_epoch) if args.start_epoch > 0: load_net_state(mpNet0, os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(args.model_path, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpNet0.cuda() mpNet0.mlp.cuda() mpNet0.encoder.cuda() if args.opt == 'Adagrad': mpNet0.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpNet0.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpNet0.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) if args.start_epoch > 0: load_opt_state(mpNet0, os.path.join(args.model_path, model_path)) # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_1_step_%d.pkl' %(args.start_epoch, args.num_steps) model_path = 'kmpnet_epoch_%d_direction_1.pkl' % (args.start_epoch) if args.start_epoch > 0: load_net_state(mpNet1, os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(args.model_path, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpNet1.cuda() mpNet1.mlp.cuda() mpNet1.encoder.cuda() if args.opt == 'Adagrad': mpNet1.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpNet1.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpNet1.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) if args.start_epoch > 0: load_opt_state(mpNet1, os.path.join(args.model_path, model_path)) cae = CAE_acrobot_voxel_2d_3 mlp = mlp_acrobot.MLP6 costNet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, 1, cae, mlp) model_path = 'cost_kmpnet_epoch_300_direction_0_step_20.pkl' model_folder = '/media/arclabdl1/HD1/YLmiao/results/KMPnet_res/cost_acrobot_obs_8_lr0.010000_SGD_step_20/' if args.start_epoch > 0: load_net_state(costNet, os.path.join(model_folder, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(model_folder, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): costNet.cuda() costNet.mlp.cuda() costNet.encoder.cuda() if args.opt == 'Adagrad': costNet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': costNet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': costNet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) if args.start_epoch > 0: load_opt_state(costNet, os.path.join(model_folder, model_path)) costNet.eval() # define informer circular = system.is_circular_topology() def critics(env, x0, xG): x0_x = torch.from_numpy(x0.x).type(torch.FloatTensor) xG_x = torch.from_numpy(xG.x).type(torch.FloatTensor) x0_x = normalize_func(x0_x) xG_x = normalize_func(xG_x) if torch.cuda.is_available(): x0_x = x0_x.cuda() xG_x = xG_x.cuda() x = torch.cat([x0_x, xG_x], dim=0) if torch.cuda.is_available(): x = x.cuda() cost = costNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data cost = cost.numpy()[0][0] return cost def informer(env, x0, xG, direction): x0_x = torch.from_numpy(x0.x).type(torch.FloatTensor) xG_x = torch.from_numpy(xG.x).type(torch.FloatTensor) x0_x = normalize_func(x0_x) xG_x = normalize_func(xG_x) if torch.cuda.is_available(): x0_x = x0_x.cuda() xG_x = xG_x.cuda() if direction == 0: x = torch.cat([x0_x, xG_x], dim=0) mpNet = mpNet0 if torch.cuda.is_available(): x = x.cuda() next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data next_state = unnormalize_func(next_state).numpy()[0] delta_x = next_state - x0.x # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi if delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi res = Node(x0.x + delta_x) cov = np.diag([0.02, 0.02, 0.02, 0.02]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0] * 0. rand_x_init[-1] = rand_x_init[-1] * 0. x_init = np.linspace(x0.x, x0.x + delta_x, num_steps) + rand_x_init ## TODO: : change this to general case u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i #u_init_i = control[max_d_i] cost_i = (num_steps - 1) * step_sz #TOEDIT #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) """ print('init:') print('x_init:') print(x_init) print('u_init:') print(u_init) print('t_init:') print(t_init) print('xw:') print(next_state) """ else: x = torch.cat([x0_x, xG_x], dim=0) mpNet = mpNet1 next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data next_state = unnormalize_func(next_state).numpy()[0] delta_x = next_state - x0.x # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi elif delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi #next_state = state[max_d_i] + delta_x next_state = x0.x + delta_x res = Node(next_state) # initial: from max_d_i to max_d_i+1 x_init = np.linspace(next_state, x0.x, num_steps) + rand_x_init # action: copy over to number of steps u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i cost_i = (num_steps - 1) * step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) return res, x_init, u_init, t_init def init_informer(env, x0, xG, direction): if direction == 0: next_state = xG.x delta_x = next_state - x0.x # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) #print('inside init_informer') #print('delta_x[%d]: %f' % (i, delta_x[i])) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.9: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi if delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi res = Node(next_state) cov = np.diag([0.02, 0.02, 0.02, 0.02]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0] * 0. rand_x_init[-1] = rand_x_init[-1] * 0. x_init = np.linspace(x0.x, x0.x + delta_x, num_steps) + rand_x_init ## TODO: : change this to general case u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i #u_init_i = control[max_d_i] #cost_i = 10*step_sz cost_i = (num_steps - 1) * step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) else: next_state = xG.x delta_x = x0.x - next_state # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi elif delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi #next_state = state[max_d_i] + delta_x res = Node(next_state) # initial: from max_d_i to max_d_i+1 x_init = np.linspace(next_state, next_state + delta_x, num_steps) + rand_x_init # action: copy over to number of steps u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i cost_i = (num_steps - 1) * step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) return x_init, u_init, t_init # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. T = 1 for _ in range(T): # unnormalize function normalize_func = lambda x: normalize(x, args.world_size) unnormalize_func = lambda x: unnormalize(x, args.world_size) # seen if args.seen_N > 0: time_file = os.path.join( args.model_path, 'time_seen_epoch_%d_mlp.p' % (args.start_epoch)) fes_path_, valid_path_ = eval_tasks( mpNet0, mpNet1, seen_test_data, args.model_path, time_file, IsInCollision, normalize_func, unnormalize_func, critics, informer, init_informer, system, dynamics, xdot, jax_dynamics, enforce_bounds, traj_opt, step_sz, num_steps) valid_path = valid_path_.flatten() fes_path = fes_path_.flatten( ) # notice different environments are involved seen_test_suc_rate += fes_path.sum() / valid_path.sum() # unseen if args.unseen_N > 0: time_file = os.path.join( args.model_path, 'time_unseen_epoch_%d_mlp.p' % (args.start_epoch)) fes_path_, valid_path_ = eval_tasks( mpNet0, mpNet1, unseen_test_data, args.model_path, time_file, IsInCollision, normalize_func, unnormalize_func, critics, informer, init_informer, system, dynamics, xdot, jax_dynamics, enforce_bounds, traj_opt, step_sz, num_steps) valid_path = valid_path_.flatten() fes_path = fes_path_.flatten( ) # notice different environments are involved unseen_test_suc_rate += fes_path.sum() / valid_path.sum() if args.seen_N > 0: seen_test_suc_rate = seen_test_suc_rate / T f = open( os.path.join(args.model_path, 'seen_accuracy_epoch_%d.txt' % (args.start_epoch)), 'w') f.write(str(seen_test_suc_rate)) f.close() if args.unseen_N > 0: unseen_test_suc_rate = unseen_test_suc_rate / T # Save the models f = open( os.path.join(args.model_path, 'unseen_accuracy_epoch_%d.txt' % (args.start_epoch)), 'w') f.write(str(unseen_test_suc_rate)) f.close()
def main(args): # load MPNet #global hl if torch.cuda.is_available(): torch.cuda.set_device(args.device) if args.debug: from sparse_rrt import _sst_module from plan_utility import cart_pole, cart_pole_obs, pendulum, acrobot_obs from tools import data_loader cpp_propagator = _sst_module.SystemPropagator() if args.env_type == 'pendulum': if args.debug: normalize = pendulum.normalize unnormalize = pendulum.unnormalize system = standard_cpp_systems.PSOPTPendulum() dynamics = None enforce_bounds = None step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole': if args.debug: normalize = cart_pole.normalize unnormalize = cart_pole.unnormalize dynamics = cartpole.dynamics system = _sst_module.CartPole() enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': if args.debug: normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.CartPole() dynamics = cartpole.dynamics enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'acrobot_obs': if args.debug: normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate( system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d elif args.env_type == 'acrobot_obs_8': if args.debug: normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate( system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 mlp = mlp_acrobot.MLP6 cae = CAE_acrobot_voxel_2d_3 mpnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) # load net # load previously trained model if start epoch > 0 model_dir = args.model_dir model_dir = model_dir + 'cost_' + args.env_type + "_lr%f_%s_step_%d/" % ( args.learning_rate, args.opt, args.num_steps) if not os.path.exists(model_dir): os.makedirs(model_dir) model_path = 'cost_kmpnet_epoch_%d_direction_%d_step_%d.pkl' % ( args.start_epoch, args.direction, args.num_steps) torch_seed, np_seed, py_seed = 0, 0, 0 if args.start_epoch > 0: #load_net_state(mpnet, os.path.join(args.model_path, model_path)) load_net_state(mpnet, os.path.join(model_dir, model_path)) #torch_seed, np_seed, py_seed = load_seed(os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(model_dir, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpnet.cuda() mpnet.mlp.cuda() mpnet.encoder.cuda() if args.opt == 'Adagrad': mpnet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpnet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpnet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) elif args.opt == 'ASGD': mpnet.set_opt(torch.optim.ASGD, lr=args.learning_rate) if args.start_epoch > 0: #load_opt_state(mpnet, os.path.join(args.model_path, model_path)) load_opt_state(mpnet, os.path.join(model_dir, model_path)) # load train and test data print('loading...') if args.debug: obs, cost_dataset, cost_targets, env_indices, \ _, _, _, _ = data_loader.load_train_dataset_cost(N=args.no_env, NP=args.no_motion_paths, data_folder=args.path_folder, obs_f=True, direction=args.direction, dynamics=dynamics, enforce_bounds=enforce_bounds, system=system, step_sz=step_sz, num_steps=args.num_steps) # randomize the dataset before training data = list(zip(cost_dataset, cost_targets, env_indices)) random.shuffle(data) dataset, targets, env_indices = list(zip(*data)) dataset = list(dataset) targets = list(targets) env_indices = list(env_indices) dataset = np.array(dataset) targets = np.array(targets) env_indices = np.array(env_indices) # record bi = dataset.astype(np.float32) print('bi shape:') print(bi.shape) bt = targets bi = torch.FloatTensor(bi) bt = torch.FloatTensor(bt) bi = normalize(bi, args.world_size) bi = to_var(bi) bt = to_var(bt) if obs is None: bobs = None else: bobs = obs[env_indices].astype(np.float32) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) else: bobs = np.random.rand(1, 1, args.AE_input_size, args.AE_input_size) bobs = torch.from_numpy(bobs).type(torch.FloatTensor) bobs = to_var(bobs) bi = np.random.rand(1, args.total_input_size) bt = np.random.rand(1, args.output_size) bi = torch.from_numpy(bi).type(torch.FloatTensor) bt = torch.from_numpy(bt).type(torch.FloatTensor) bi = to_var(bi) bt = to_var(bt) # set to training model to enable dropout #mpnet.train() mpnet.eval() MLP = mpnet.mlp encoder = mpnet.encoder traced_encoder = torch.jit.trace(encoder, (bobs)) encoder_output = encoder(bobs) mlp_input = torch.cat((encoder_output, bi), 1) traced_MLP = torch.jit.trace(MLP, (mlp_input)) traced_encoder.save("costnet_%s_encoder_epoch_%d_step_%d.pt" % (args.env_type, args.start_epoch, args.num_steps)) traced_MLP.save("costnet_%s_MLP_epoch_%d_step_%d.pt" % (args.env_type, args.start_epoch, args.num_steps)) # test the traced model serilized_encoder = torch.jit.script(encoder) serilized_MLP = torch.jit.script(MLP) serilized_encoder_output = serilized_encoder(bobs) serilized_MLP_input = torch.cat((serilized_encoder_output, bi), 1) serilized_MLP_output = serilized_MLP(serilized_MLP_input) print('encoder output: ', serilized_encoder_output) print('MLP output: ', serilized_MLP_output) print('data: ', bt)