def main(args): # set seed torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) np.random.seed(np_seed) random.seed(py_seed) # Build the models # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 100 elif args.env_type == 'cartpole_obs_2': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP2 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 100 elif args.env_type == 'cartpole_obs_3': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP4 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 200 elif args.env_type == 'cartpole_obs_4': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 200 elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None obs_f = True obs_width = 6.0 #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'acrobot') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) mpnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp, None) # load net # load previously trained model if start epoch > 0 model_dir = args.model_dir if args.loss == 'mse': if args.multigoal == 0: model_dir = model_dir + args.env_type + "_lr%f_%s_step_%d/" % ( args.learning_rate, args.opt, args.num_steps) else: model_dir = model_dir + args.env_type + "_lr%f_%s_step_%d_multigoal/" % ( args.learning_rate, args.opt, args.num_steps) else: if args.multigoal == 0: model_dir = model_dir + args.env_type + "_lr%f_%s_loss_%s_step_%d/" % ( args.learning_rate, args.opt, args.loss, args.num_steps) else: model_dir = model_dir + args.env_type + "_lr%f_%s_loss_%s_step_%d_multigoal/" % ( args.learning_rate, args.opt, args.loss, args.num_steps) print(model_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) model_path = 'kmpnet_epoch_%d_direction_%d_step_%d.pkl' % ( args.start_epoch, args.direction, args.num_steps) torch_seed, np_seed, py_seed = 0, 0, 0 if args.start_epoch > 0: #load_net_state(mpnet, os.path.join(args.model_path, model_path)) load_net_state(mpnet, os.path.join(model_dir, model_path)) #torch_seed, np_seed, py_seed = load_seed(os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(model_dir, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpnet.cuda() mpnet.mlp.cuda() mpnet.encoder.cuda() if args.opt == 'Adagrad': mpnet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpnet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpnet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) elif args.opt == 'ASGD': mpnet.set_opt(torch.optim.ASGD, lr=args.learning_rate) if args.start_epoch > 0: #load_opt_state(mpnet, os.path.join(args.model_path, model_path)) load_opt_state(mpnet, os.path.join(model_dir, model_path)) mpnet.eval() # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. # find path plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') def update_line(h, ax, new_data): h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def draw_update_line(ax): ax.relim() ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() # randomly pick up a point in the data, and find similar data in the dataset # plot the next point obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data for envi in range(2): for pathi in range(10): obs_i = obs[envi] new_obs_i = [] obs_i = obs[envi] plan_res_path = [] plan_time_path = [] plan_cost_path = [] data_cost_path = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(121) ax_vel = fig.add_subplot(122) #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) ax_vel.set_xlim(-40, 40) ax_vel.set_ylim(-2, 2) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * 30. / dx) jmin = 0 jmax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state xs = paths[envi][pathi] us = controls[envi][pathi] ts = costs[envi][pathi] # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) max_steps = int(ts[k] / step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = xs[k] state[-1] = xs[k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = xs[-1] #print(len(state)) xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], psopt_system) ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='green') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) ax_vel.scatter(xs_to_plot[:, 1], xs_to_plot[:, 3], c='green', s=0.1) draw_update_line(ax_vel) plt.waitforbuttonpress() # visualize mPNet path mpnet_paths = [] state = xs[0] #for k in range(int(len(xs_to_plot)/args.num_steps)): for k in range(50): mpnet_paths.append(state) bi = np.concatenate([state, xs[-1]]) bi = np.array([bi]) bi = torch.from_numpy(bi).type(torch.FloatTensor) print(bi) bi = normalize(bi, args.world_size) bi = to_var(bi) if obc is None: bobs = None else: bobs = np.array([obc[envi]]).astype(np.float32) print(bobs.shape) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) bt = mpnet(bi, bobs).cpu() bt = unnormalize(bt, args.world_size) bt = bt.detach().numpy() print(bt.shape) state = bt[0] print(mpnet_paths) xs_to_plot = np.array(mpnet_paths) print(len(xs_to_plot)) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], psopt_system) ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='lightgreen') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) ax_vel.scatter(xs_to_plot[:, 1], xs_to_plot[:, 3], c='lightgreen') draw_update_line(ax_vel) plt.waitforbuttonpress()
correct_cost_file = dir + 'cost_%d' % (j) + ".pkl" # store the corrected file file = open(correct_path_file, 'wb') pickle.dump(np.array(correct_path), file) file = open(correct_control_file, 'wb') pickle.dump(np.array(correct_control), file) file = open(correct_cost_file, 'wb') pickle.dump(np.array(correct_cost), file) cpp_propagator = _sst_module.SystemPropagator() acrobot_system = _sst_module.PSOPTAcrobot() acrobot_dynamics = lambda x, u, t: cpp_propagator.propagate( acrobot_system, x, u, t) cartpole_system = _sst_module.PSOPTCartPole() cartpole_dynamics = lambda x, u, t: cpp_propagator.propagate( cartpole_system, x, u, t) # use the following if don't want to modify the original dataset, but create new ones #correct_dataset(N=10, NP=1000, data_folder='../data/acrobot_obs/', obs_f=True, direction=0, dynamics=acrobot_dynamics, enforce_bounds=None, system=None, step_sz=0.02) #correct_dataset(N=10, NP=1000, data_folder='../data/cartpole_obs/', obs_f=True, direction=0, dynamics=cartpole_dynamics, enforce_bounds=None, system=None, step_sz=0.002) # use the following if want to change the original dataset rename_dataset(N=10, NP=1000, data_folder='../data/acrobot_obs/', obs_f=True, direction=0, dynamics=acrobot_dynamics,
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 cost_threshold = 1.2 min_time_steps = 10 max_time_steps = 200 integration_step = 0.002 obs_f = True obs_file = None obc_file = None system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_width = 4.0 IsInCollision = cartpole_IsInCollision enforce_bounds = cartpole_enforce_bounds elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 IsInCollision = acrobot_IsInCollision enforce_bounds = acrobot_enforce_bounds if args.env_type == 'pendulum': step_sz = 0.002 num_steps = 20 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join(os.getcwd() + '/c++/', 'acrobot_mlp_annotated_test_gpu.pt') encoder_path = os.path.join(os.getcwd() + '/c++/', 'acrobot_encoder_annotated_test_cpu.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue_t, out_queue_cost, random_seed): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, 4., 'cartpole') #distance_computer = propagate_system.distance_computer() distance_computer = _sst_module.euclidean_distance( np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 #delta_near=.2 #delta_drain=.1 cost_threshold = 1.05 min_time_steps = 10 max_time_steps = 200 #min_time_steps = 5 #max_time_steps = 400 integration_step = 0.002 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 cost_threshold = 1.05 min_time_steps = 5 max_time_steps = 100 integration_step = 0.02 planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=distance_computer, start_state=start_state, goal_state=goal_state, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=delta_near, sst_delta_drain=delta_drain) #print('creating planner...') # generate a path by using SST to plan for some maximal iterations time0 = time.time() for i in range(max_iteration): planner.step(propagate_system, min_time_steps, max_time_steps, integration_step) # early break for initial path solution = planner.get_solution() if solution is not None: #print('solution found already.') # based on cost break xs, us, ts = solution t_sst = np.sum(ts) #print(t_sst) #print(cost_i) if t_sst <= cost_i * cost_threshold: print('solved in %d iterations' % (i)) break plan_time = time.time() - time0 solution = planner.get_solution() xs, us, ts = solution print(np.linalg.norm(np.array(xs[-1]) - goal_state)) """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) #ax.set_xlim(-30, 30) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 #imax = int(2*30./dx) imax = int(2*np.pi/dtheta) jmin = 0 jmax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state xs_to_plot = np.array(detailed_data_path) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #plt.waitforbuttonpress() if solution is not None: xs, us, ts = solution # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) print(ts[k]) max_steps = int(np.round(ts[k]/step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation for step in range(1,max_steps+1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = xs[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5) start_state_np = np.array(start_state) goal_state_np = np.array(goal_state) ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*') ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ # validate if the path contains collision if solution is not None: res_x, res_u, res_t = solution print('solution_x:') print(res_x) print('path_x:') print(np.array(data_path)) # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(res_t[k] / step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation #p_start = res_x[k] #state[-1] = res_x[k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state if IsInCollision(p_start, obs_i): print( 'collision happens at u_index: %d, step: %d' % (k, step)) assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = res_x[-1] # validation end print('plan time: %fs' % (plan_time)) if solution is None: print('failed.') out_queue_t.put(-1) out_queue_cost.put(-1) else: print('path succeeded.') out_queue_t.put(plan_time) out_queue_cost.put(t_sst) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue_t = Queue(1) queue_cost = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) # for all planning, use a flattened vector to store plan_times = [] plan_res_all = [] plan_costs = [] data_costs = [] # store in a 2d vector, for env and path plan_res_env = [] plan_time_env = [] plan_cost_env = [] data_cost_env = [] # directory to save the results res_path = args.res_path res_path = res_path + args.env_type + "_sst_compare_with_mpnet/" if args.env_type == 'acrobot_obs': res_path = '/media/arclabdl1/HD1/YLmiao/mpc-mpnet-cuda-yinglong/results/cpp_full/acrobot_obs/default_small_model_batch/' elif args.env_type == 'cartpole_obs': res_path = '/media/arclabdl1/HD1/YLmiao/mpc-mpnet-cuda-yinglong/results/cpp_full/cartpole_obs/default_small_model_batch/' mpnet_tree_time = np.load(res_path + 'time_10_100.npy', allow_pickle=True) mpnet_tree_sr = np.load(res_path + 'sr_10_100.npy', allow_pickle=True) mpnet_tree_cost = np.load(res_path + 'costs_10_100.npy', allow_pickle=True) if not os.path.exists(res_path): os.makedirs(res_path) for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] plan_res_path = [] plan_time_path = [] plan_cost_path = [] data_cost_path = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] #goal_inform_state = paths[i][j][-1] goal_inform_state = sgs[i][j][1] goal_state = sgs[i][j][1] # propagate data p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) #max_steps = int(costs[i][j][k]/step_sz) max_steps = 1000000 accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state if IsInCollision(p_start, obs_i): print( 'collision happens at u_index: %d, step: %d' % (k, step)) assert not IsInCollision(p_start, obs_i) if np.linalg.norm(p_start - paths[i][j][k + 1]) <= 1e-3: break # validation end cost_i = np.sum(cost) print('data cost:', cost_i) if mpnet_tree_sr[i][j] != 0: # use MPNet tree cost cost_i = mpnet_tree_cost[i][j] print('using mpnet cost: ', cost_i) #cost_i = 100000000. # acrobot: 300000 # cartpole: 500000 print('environment: %d/%d, path: %d/%d' % (i + 1, len(paths), j + 1, len(paths[i]))) plan_t_trials = [] plan_cost_trials = [] for trial in range(1): random_seed = random.randint(0, 100) #random_seed = 0 p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], state, paths[i][j], start_state, goal_state, goal_inform_state, cost_i, args.num_iter, queue_t, queue_cost, random_seed)) #plan_one_path(obs_i, obs[i], obc[i], state, paths[i][j], start_state, goal_state, goal_inform_state, cost_i, args.num_iter, queue_t, queue_cost, random_seed) p.start() p.join() plan_t = queue_t.get() plan_cost = queue_cost.get() if plan_t != -1: plan_t_trials.append(plan_t) plan_cost_trials.append(plan_cost) #assert len(plan_ts) == 10 plan_t = np.mean(plan_t_trials) plan_cost = np.mean(plan_cost_trials) if plan_t == -1: # failed, do not record in the flattened list plan_res_all.append(0) # record in the 2d list plan_res_path.append(0) plan_time_path.append(plan_t) plan_cost_path.append(plan_cost) data_cost_path.append(-1.0) else: # record in the flattened list plan_res_all.append(1) plan_times.append(plan_t) plan_costs.append(plan_cost) data_costs.append(cost_i) # record in the 2d list plan_res_path.append(1) plan_time_path.append(plan_t) plan_cost_path.append(plan_cost) data_cost_path.append(cost_i) print('plan costs:') print(plan_costs) print('average accuracy up to now: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) print('plan average cost: %f' % (np.array(plan_costs).mean())) print('plan cost std: %f' % (np.array(plan_costs).std())) print('data average cost: %f' % (np.array(data_costs).mean())) print('data cost std: %f' % (np.array(data_costs).std())) # store in the 2d list plan_res_env.append(plan_res_path) plan_time_env.append(plan_time_path) plan_cost_env.append(plan_cost_path) data_cost_env.append(data_cost_path) # for every environment planned, save # save the 2d list # save as numpy array #np.save(res_path+"plan_res.npy", np.array(plan_res_env)) #np.save(res_path+"plan_time.npy", np.array(plan_time_env)) #np.save(res_path+"plan_cost.npy", np.array(plan_cost_env)) #np.save(res_path+"data_cost.npy", np.array(data_cost_env)) print('plan accuracy: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) print('plan average cost: %f' % (np.array(plan_costs).mean())) print('plan cost std: %f' % (np.array(plan_costs).std())) print('data average cost: %f' % (np.array(data_costs).mean())) print('data cost std: %f' % (np.array(data_costs).std())) # save the 2d list # save as numpy array plan_res_env = np.array(plan_res_env) plan_time_env = np.array(plan_time_env) plan_cost_env = np.array(plan_cost_env) data_cost_env = np.array(data_cost_env) np.save(res_path + "plan_res.npy", plan_res_env) np.save(res_path + "plan_time.npy", plan_time_env) np.save(res_path + "plan_cost.npy", plan_cost_env) np.save(res_path + "data_cost.npy", data_cost_env)
def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue_t, out_queue_cost, random_seed): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, 4., 'cartpole') #distance_computer = propagate_system.distance_computer() distance_computer = _sst_module.euclidean_distance( np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 #delta_near=.2 #delta_drain=.1 cost_threshold = 1.05 min_time_steps = 10 max_time_steps = 200 #min_time_steps = 5 #max_time_steps = 400 integration_step = 0.002 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 cost_threshold = 1.05 min_time_steps = 5 max_time_steps = 100 integration_step = 0.02 planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=distance_computer, start_state=start_state, goal_state=goal_state, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=delta_near, sst_delta_drain=delta_drain) #print('creating planner...') # generate a path by using SST to plan for some maximal iterations time0 = time.time() for i in range(max_iteration): planner.step(propagate_system, min_time_steps, max_time_steps, integration_step) # early break for initial path solution = planner.get_solution() if solution is not None: #print('solution found already.') # based on cost break xs, us, ts = solution t_sst = np.sum(ts) #print(t_sst) #print(cost_i) if t_sst <= cost_i * cost_threshold: print('solved in %d iterations' % (i)) break plan_time = time.time() - time0 solution = planner.get_solution() xs, us, ts = solution print(np.linalg.norm(np.array(xs[-1]) - goal_state)) """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) #ax.set_xlim(-30, 30) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 #imax = int(2*30./dx) imax = int(2*np.pi/dtheta) jmin = 0 jmax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state xs_to_plot = np.array(detailed_data_path) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #plt.waitforbuttonpress() if solution is not None: xs, us, ts = solution # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) print(ts[k]) max_steps = int(np.round(ts[k]/step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation for step in range(1,max_steps+1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = xs[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5) start_state_np = np.array(start_state) goal_state_np = np.array(goal_state) ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*') ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ # validate if the path contains collision if solution is not None: res_x, res_u, res_t = solution print('solution_x:') print(res_x) print('path_x:') print(np.array(data_path)) # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(res_t[k] / step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation #p_start = res_x[k] #state[-1] = res_x[k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state if IsInCollision(p_start, obs_i): print( 'collision happens at u_index: %d, step: %d' % (k, step)) assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = res_x[-1] # validation end print('plan time: %fs' % (plan_time)) if solution is None: print('failed.') out_queue_t.put(-1) out_queue_cost.put(-1) else: print('path succeeded.') out_queue_t.put(plan_time) out_queue_cost.put(t_sst)
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, obs_width, 'cartpole') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 101 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 device = 3 num_sample = 10 min_time_steps = 10 max_time_steps = 200 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 40 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \ 200, num_steps, step_sz, propagate_system, 3) #cost_threshold = cost_i * 1.1 cost_threshold = 100000000. """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') for i in range(len(data)): update_line(hl, ax, data[i]) draw_update_line(ax) # visualization end """ # visualize for cartpole plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') def update_line(h, ax, new_data): h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def draw_update_line(ax): ax.relim() ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() # randomly pick up a point in the data, and find similar data in the dataset # plot the next point #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * 30. / dx) jmin = 0 jmax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state # visualization end # generate a path by using SST to plan for some maximal iterations state_t = start_state pick_goal_threshold = .0 ax.scatter(goal_inform_state[0], goal_inform_state[2], marker='*', c='red') # cartpole for i in range(max_iteration): time0 = time.time() # determine if picking goal based on iteration number goal_prob = random.random() #flag=1: using MPNet #flag=0: not using MPNet if goal_prob <= pick_goal_threshold: flag = 0 else: flag = 1 bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_tree_SMP_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \ flag, goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain, cost_threshold) if len( bvp_u ) != 0: # and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp xw_scat = ax.scatter(mpnet_res[0], mpnet_res[2], c='lightgreen') draw_update_line(ax) # propagate data p_start = bvp_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_u)): #state_i.append(len(detail_paths)-1) max_steps = int(bvp_t[k] / step_sz) accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for j in range(len(xs_to_plot)): xs_to_plot[j] = wrap_angle(xs_to_plot[j], propagate_system) xs_to_plot = xs_to_plot[::5] #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') # acrobot ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='green') # cartpole #ax.scatter(bvp_x[:,0], bvp_x[:,1], c='green') print('solution: x') print(bvp_x) print('solution: u') print(bvp_u) print('solution: t') print(bvp_t) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #state_t = state[-1] print('state_t:') print(state_t) print('mpnet_res') print(mpnet_res) # based on flag, determine how to change state_t if flag: # only change state_t if in MPNet inform mode #if len(bvp_u) != 0: if True: # try using steered result as next start #if not IsInCollision(state_t, obs_i): state_t = mpnet_res print('after copying to state_t:') print('state_t') print(state_t) # if in collision, then not using it else: print('failure') state_t = start_state # failed BVP, back to origin plan_time = time.time() - time0 print('plan time: %fs' % (plan_time)) if len(res_u) == 0: print('failed.') out_queue.put(-1) else: print('path succeeded.') print('cost: %f' % (np.sum(res_t))) print('cost_threshold: %f' % (cost_threshold)) print('data cost: %f' % (cost_i)) out_queue.put(plan_time)
def main(args): # load MPNet #global hl if torch.cuda.is_available(): torch.cuda.set_device(args.device) if args.debug: from sparse_rrt import _sst_module from plan_utility import cart_pole, cart_pole_obs, pendulum, acrobot_obs from tools import data_loader cpp_propagator = _sst_module.SystemPropagator() if args.env_type == 'pendulum': if args.debug: normalize = pendulum.normalize unnormalize = pendulum.unnormalize system = standard_cpp_systems.PSOPTPendulum() dynamics = None enforce_bounds = None step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole': if args.debug: normalize = cart_pole.normalize unnormalize = cart_pole.unnormalize dynamics = cartpole.dynamics system = _sst_module.CartPole() enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': if args.debug: normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 mlp = mlp_cartpole.MLP cae = CAE_cartpole_voxel_2d elif args.env_type == 'acrobot_obs': if args.debug: normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d if args.loss == 'mse': loss_f = nn.MSELoss() #loss_f = mse_loss elif args.loss == 'l1_smooth': loss_f = nn.SmoothL1Loss() #loss_f = l1_smooth_loss elif args.loss == 'mse_decoupled': def mse_decoupled(y1, y2): # for angle terms, wrap it to -pi~pi l_0 = torch.abs(y1[:,0] - y2[:,0]) l_1 = torch.abs(y1[:,1] - y2[:,1]) l_2 = torch.abs(y1[:,2] - y2[:,2]) # angular dimension l_3 = torch.abs(y1[:,3] - y2[:,3]) cond = l_2 > np.pi l_2 = torch.where(cond, 2*np.pi-l_2, l_2) l_0 = torch.mean(l_0) l_1 = torch.mean(l_1) l_2 = torch.mean(l_2) l_3 = torch.mean(l_3) return torch.stack([l_0, l_1, l_2, l_3]) loss_f = mse_decoupled mpnet_pnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size // 2, cae, mlp, loss_f) mpnet_vnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size // 2, cae, mlp, loss_f) mpnet_pos_vel = PosVelKMPNet(mpnet_p, mpnet_v) # load net # load previously trained model if start epoch > 0 model_dir = args.model_dir if args.loss == 'mse': if args.multigoal == 0: model_dir = model_dir+args.env_type+"_lr%f_%s_step_%d/" % (args.learning_rate, args.opt, args.num_steps) else: model_dir = model_dir+args.env_type+"_lr%f_%s_step_%d_multigoal/" % (args.learning_rate, args.opt, args.num_steps) else: if args.multigoal == 0: model_dir = model_dir+args.env_type+"_lr%f_%s_loss_%s_step_%d/" % (args.learning_rate, args.opt, args.loss, args.num_steps) else: model_dir = model_dir+args.env_type+"_lr%f_%s_loss_%s_step_%d_multigoal/" % (args.learning_rate, args.opt, args.loss, args.num_steps) if not os.path.exists(model_dir): os.makedirs(model_dir) model_pnet_path='kmpnet_pnet_epoch_%d_direction_%d_step_%d.pkl' %(args.start_epoch, args.direction, args.num_steps) model_vnet_path='kmpnet_vnet_epoch_%d_direction_%d_step_%d.pkl' %(args.start_epoch, args.direction, args.num_steps) torch_seed, np_seed, py_seed = 0, 0, 0 if args.start_epoch > 0: #load_net_state(mpnet, os.path.join(args.model_path, model_path)) load_net_state(mpnet_p, os.path.join(model_dir, model_pnet_path)) load_net_state(mpnet_v, os.path.join(model_dir, model_vnet_path)) #torch_seed, np_seed, py_seed = load_seed(os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed(os.path.join(model_dir, model_pnet_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpnet_pnet.cuda() mpnet_pnet.mlp.cuda() mpnet_pnet.encoder.cuda() mpnet_vnet.cuda() mpnet_vnet.mlp.cuda() mpnet_vnet.encoder.cuda() # load train and test data print('loading...') if args.debug: obs, cost_dataset, cost_targets, env_indices, \ _, _, _, _ = data_loader.load_train_dataset_cost(N=args.no_env, NP=args.no_motion_paths, data_folder=args.path_folder, obs_f=True, direction=args.direction, dynamics=dynamics, enforce_bounds=enforce_bounds, system=system, step_sz=step_sz, num_steps=args.num_steps) # randomize the dataset before training data=list(zip(cost_dataset,cost_targets,env_indices)) random.shuffle(data) dataset,targets,env_indices=list(zip(*data)) dataset = list(dataset) targets = list(targets) env_indices = list(env_indices) dataset = np.array(dataset) targets = np.array(targets) env_indices = np.array(env_indices) # record bi = dataset.astype(np.float32) print('bi shape:') print(bi.shape) bt = targets bi = torch.FloatTensor(bi) bt = torch.FloatTensor(bt) bi = normalize(bi, args.world_size) bi=to_var(bi) bt=to_var(bt) if obs is None: bobs = None else: bobs = obs[env_indices].astype(np.float32) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) else: bobs = np.random.rand(1,1,args.AE_input_size,args.AE_input_size) bobs = torch.from_numpy(bobs).type(torch.FloatTensor) bobs = to_var(bobs) bi = np.random.rand(1, args.total_input_size) bt = np.random.rand(1, args.output_size) bi = torch.from_numpy(bi).type(torch.FloatTensor) bt = torch.from_numpy(bt).type(torch.FloatTensor) bi = to_var(bi) bt = to_var(bt) # set to training model to enable dropout mpnet.train() #mpnet.eval() MLP = mpnet.mlp encoder = mpnet.encoder traced_encoder = torch.jit.trace(encoder, (bobs)) encoder_output = encoder(bobs) mlp_input = torch.cat((encoder_output, bi), 1) traced_MLP = torch.jit.trace(MLP, (mlp_input)) traced_encoder.save('%s_encoder_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) traced_MLP.save('%s_MLP_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) #traced_encoder.save("%s_encoder_epoch_%d.pt" % (args.env_type, args.start_epoch)) #traced_MLP.save("%s_MLP_epoch_%d.pt" % (args.env_type, args.start_epoch)) # test the traced model serilized_encoder = torch.jit.script(encoder) serilized_MLP = torch.jit.script(MLP) serilized_encoder_output = serilized_encoder(bobs) serilized_MLP_input = torch.cat((serilized_encoder_output, bi), 1) serilized_MLP_output = serilized_MLP(serilized_MLP_input) print('encoder output: ', serilized_encoder_output) print('MLP output: ', serilized_MLP_output) print('data: ', bt)
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue_t, out_queue_cost): if args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 device = 3 num_sample = 10 min_time_steps = 5 max_time_steps = 100 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 30 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 elif args.env_type == 'cartpole_obs': obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, obs_width, 'cartpole') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.002 integration_step = 0.002 num_steps = 101 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 #delta_near=1.0 #delta_drain=0.5 device = 3 num_sample = 10 min_time_steps = 10 max_time_steps = 200 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 90 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 #print('creating planner...') #planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 200, num_steps, step_sz, propagate_system, device) planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=distance_computer, start_state=start_state, goal_state=goal_state, goal_radius=goal_radius, random_seed=0, sst_delta_near=delta_near, sst_delta_drain=delta_drain) cost_threshold = cost_i * args.cost_threshold #cost_threshold = 100000000. # generate a path by using SST to plan for some maximal iterations time0 = time.time() print('before plan_tree_SMP...') print('start_state:') print(start_state) print('goal_state:') print(goal_inform_state) res_x, res_u, res_t = planner.plan_tree_SMP("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \ goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain, cost_threshold, \ num_sample, min_time_steps, max_time_steps, \ mpnet_goal_threshold, mpnet_length_threshold, \ pick_goal_init_threshold, pick_goal_end_threshold, \ pick_goal_start_percent) print('after plan_tree_SMP.') plan_time = time.time() - time0 """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state if len(res_u): # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(res_t[k]/step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = res_x[k] state[-1] = res_x[k] for step in range(1,max_steps+1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = res_x[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) if IsInCollision(xs_to_plot[i], obs_i): print('in collision') ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ #im = planner.visualize_nodes(propagate_system) #sec = input('Let us wait for user input') #show_image_opencv(im, "planning_tree", wait=True) # validate if the path contains collision """ if len(res_u): # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(res_t[k]/step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = res_x[k] state[-1] = res_x[k] for step in range(1,max_steps+1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = res_x[-1] # validation end """ print('plan time: %fs' % (plan_time)) if len(res_x) == 0: print('failed.') out_queue_t.put(-1) out_queue_cost.put(-1.0) else: print('path succeeded.') print('cost: %f' % (np.sum(res_t))) print('cost_threshold: %f' % (cost_threshold)) print('data cost: %f' % (cost_i)) out_queue_t.put(plan_time) out_queue_cost.put(np.sum(res_t))
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 cost_threshold = 1.2 min_time_steps = 10 max_time_steps = 200 integration_step = 0.002 obs_width = 4.0 obs_f = True system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 if args.env_type == 'pendulum': step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 cost_threshold = 1.2 min_time_steps = 10 max_time_steps = 200 integration_step = 0.002 obs_width = 4.0 obs_f = True IsInCollision = cartpole_IsInCollision enforce_bounds = cartpole_enforce_bounds elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join( os.getcwd() + '/c++/', '%s_MLP_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) encoder_path = os.path.join( os.getcwd() + '/c++/', '%s_encoder_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) #mlp_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_MLP_epoch_5000.pt') #encoder_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_encoder_epoch_5000.pt') #cost_mlp_path = os.path.join(os.getcwd()+'/c++/','costnet_%s_MLP_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) #cost_encoder_path = os.path.join(os.getcwd()+'/c++/','costnet_%s_encoder_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) cost_mlp_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_MLP_epoch_800_step_10.pt') cost_encoder_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_encoder_epoch_800_step_10.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, obs_width, 'cartpole') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 101 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 device = 3 num_sample = 10 min_time_steps = 10 max_time_steps = 200 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 40 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \ 200, num_steps, step_sz, propagate_system, 3) #cost_threshold = cost_i * 1.1 cost_threshold = 100000000. """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') for i in range(len(data)): update_line(hl, ax, data[i]) draw_update_line(ax) # visualization end """ # visualize for cartpole plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') def update_line(h, ax, new_data): h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def draw_update_line(ax): ax.relim() ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() # randomly pick up a point in the data, and find similar data in the dataset # plot the next point #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * 30. / dx) jmin = 0 jmax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state # visualization end # generate a path by using SST to plan for some maximal iterations state_t = start_state pick_goal_threshold = .0 ax.scatter(goal_inform_state[0], goal_inform_state[2], marker='*', c='red') # cartpole for i in range(max_iteration): time0 = time.time() # determine if picking goal based on iteration number goal_prob = random.random() #flag=1: using MPNet #flag=0: not using MPNet if goal_prob <= pick_goal_threshold: flag = 0 else: flag = 1 bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_tree_SMP_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \ flag, goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain, cost_threshold) if len( bvp_u ) != 0: # and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp xw_scat = ax.scatter(mpnet_res[0], mpnet_res[2], c='lightgreen') draw_update_line(ax) # propagate data p_start = bvp_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_u)): #state_i.append(len(detail_paths)-1) max_steps = int(bvp_t[k] / step_sz) accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for j in range(len(xs_to_plot)): xs_to_plot[j] = wrap_angle(xs_to_plot[j], propagate_system) xs_to_plot = xs_to_plot[::5] #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') # acrobot ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='green') # cartpole #ax.scatter(bvp_x[:,0], bvp_x[:,1], c='green') print('solution: x') print(bvp_x) print('solution: u') print(bvp_u) print('solution: t') print(bvp_t) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #state_t = state[-1] print('state_t:') print(state_t) print('mpnet_res') print(mpnet_res) # based on flag, determine how to change state_t if flag: # only change state_t if in MPNet inform mode #if len(bvp_u) != 0: if True: # try using steered result as next start #if not IsInCollision(state_t, obs_i): state_t = mpnet_res print('after copying to state_t:') print('state_t') print(state_t) # if in collision, then not using it else: print('failure') state_t = start_state # failed BVP, back to origin plan_time = time.time() - time0 print('plan time: %fs' % (plan_time)) if len(res_u) == 0: print('failed.') out_queue.put(-1) else: print('path succeeded.') print('cost: %f' % (np.sum(res_t))) print('cost_threshold: %f' % (cost_threshold)) print('data cost: %f' % (cost_i)) out_queue.put(plan_time) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) #obc = torch.from_numpy(obc) #if torch.cuda.is_available(): # obc = obc.cuda() plan_res = [] plan_times = [] plan_res_all = [] for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] plan_res_env = [] plan_time_env = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] goal_inform_state = paths[i][j][-1] goal_state = sgs[i][j][1] cost_i = costs[i][j].sum() #cost_i = 100000000. # propagate data p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) max_steps = int(costs[i][j][k] / step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = paths[i][j][k] state[-1] = paths[i][j][k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) detail_controls.append(controls[i][j]) detail_costs.append(step_sz) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) control.append(controls[i][j][k]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = paths[i][j][-1] data = state # end of propagation print('environment: %d/%d, path: %d/%d' % (i + 1, len(paths), j + 1, len(paths[i]))) #p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, data, queue)) plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, data, queue) #p.start() #p.join() res = queue.get() if res == -1: plan_res_env.append(0) plan_res_all.append(0) else: plan_res_env.append(1) plan_times.append(res) plan_res_all.append(1) print('average accuracy up to now: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) plan_res.append(plan_res_env) print('plan accuracy: %f' % (np.array(plan_res).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std()))
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 4.0 step_sz = 0.002 num_steps = 200 goal_radius = 2.0 random_seed = 0 delta_near = 0.01 delta_drain = 0.005 obs_f = True psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate( psopt_system, x, u, t) elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 20 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join( os.getcwd() + '/c++/', 'acrobot_obs_MLP_lr0.010000_epoch_2850_step_20.pt') encoder_path = os.path.join( os.getcwd() + '/c++/', 'acrobot_obs_encoder_lr0.010000_epoch_2850_step_20.pt') cost_mlp_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_8_MLP_epoch_300_step_20.pt') cost_encoder_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_8_encoder_epoch_300_step_20.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 4.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 4., 'cartpole') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) bvp_wrapper = _sst_module.PSOPTBVPWrapper(psopt_system, 4, 1, 0) step_sz = 0.002 num_steps = 20 psopt_num_steps = 20 psopt_step_sz = 0.002 goal_radius = 2 random_seed = 0 #delta_near=1.0 #delta_drain=0.5 delta_near = 0.01 delta_drain = 0.005 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) bvp_wrapper = _sst_module.PSOPTBVPWrapper(psopt_system, 4, 1, 0) step_sz = 0.02 num_steps = 21 psopt_num_steps = 21 psopt_step_sz = 0.02 goal_radius = 2 random_seed = 0 #delta_near=1.0 #delta_drain=0.5 delta_near = 0.1 delta_drain = 0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 20, psopt_num_steps, psopt_step_sz, step_sz, propagate_system, args.device) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * 30. / dx) jmin = 0 jmax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) data = np.array(data) ax.scatter(data[:, 0], data[:, 2], c='lightblue', s=10) ax.scatter(data[-1, 0], data[-1, 2], c='red', s=10, marker='*') draw_update_line(ax) state_t = start_state state_t = data[0] for data_i in range(num_steps, len(data), num_steps): print('iteration: %d' % (data_i)) print('state_t:') print(state_t) x_init, u_init, t_init = init_informer(propagate_system, state_t, data[data_i], psopt_num_steps + 1, psopt_step_sz) print('x_init:') print(x_init) bvp_x, bvp_u, bvp_t = bvp_wrapper.solve(state_t, x_init[-1], 20, psopt_num_steps+1, 0.8*step_sz*num_steps, 2*psopt_step_sz*psopt_num_steps, \ x_init, u_init, t_init) print('bvp_x:') print(bvp_x) print('bvp_u:') print(bvp_u) print('bvp_t:') print(bvp_t) if len( bvp_u ) != 0: # and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp # propagate data #p_start = bvp_x[0] p_start = state_t detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_t)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(bvp_t[k] / step_sz)) accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='green', s=10.0) print('solution: x') print(bvp_x) print('solution: u') print(bvp_u) print('solution: t') print(bvp_t) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #state_t = state[-1] # try using mpnet_res as new start state_t = data[data_i] #state_t = xs_to_plot[-1] print('data_i:') print(data[data_i]) else: # in incollision state_t = data[data_i] #if len(res_x) == 0: # print('failed.') out_queue.put(0) #else: # print('path succeeded.') # out_queue.put(1) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) #obc = torch.from_numpy(obc) #if torch.cuda.is_available(): # obc = obc.cuda() for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] goal_inform_state = paths[i][j][-1] goal_state = sgs[i][j][1] #p = Process(target=plan_one_path, args=(obs[i], obc[i], start_state, goal_state, 500, queue)) # propagate data p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) max_steps = int(costs[i][j][k] / step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = paths[i][j][k] state[-1] = paths[i][j][k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) detail_controls.append(controls[i][j]) detail_costs.append(step_sz) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) control.append(controls[i][j][k]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = paths[i][j][-1] data = state plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, 1000, data, queue)
def main(args): #global hl if torch.cuda.is_available(): torch.cuda.set_device(args.device) # environment setting cae = cae_identity mlp = MLP cpp_propagator = _sst_module.SystemPropagator() if args.env_type == 'pendulum': normalize = pendulum.normalize unnormalize = pendulum.unnormalize system = standard_cpp_systems.PSOPTPendulum() dynamics = None enforce_bounds = None step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole': normalize = cart_pole.normalize unnormalize = cart_pole.unnormalize dynamics = cartpole.dynamics system = _sst_module.CartPole() enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = [0, 2] vel_indices = [1, 3] elif args.env_type == 'cartpole_obs_2': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP2 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = [0, 2] vel_indices = [1, 3] elif args.env_type == 'cartpole_obs_3': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP4 cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = [0, 2] vel_indices = [1, 3] elif args.env_type == 'cartpole_obs_4_small': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d # dynamics: None -- without integration to dense trajectory dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #dynamics = None enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = np.array([0, 2]) vel_indices = np.array([1, 3]) elif args.env_type == 'cartpole_obs_4_big': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d # dynamics: None -- without integration to dense trajectory dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #dynamics = None enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = np.array([0, 2]) vel_indices = np.array([1, 3]) elif args.env_type == 'cartpole_obs_4_small_x_theta': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d # dynamics: None -- without integration to dense trajectory dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #dynamics = None enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = np.array([0, 1]) vel_indices = np.array([2, 3]) elif args.env_type == 'cartpole_obs_4_big_x_theta': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d # dynamics: None -- without integration to dense trajectory dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #dynamics = None enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = np.array([0, 1]) vel_indices = np.array([2, 3]) elif args.env_type == 'cartpole_obs_4_small_decouple_output': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d # dynamics: None -- without integration to dense trajectory dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #dynamics = None enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = np.array([0, 2]) vel_indices = np.array([1, 3]) elif args.env_type == 'cartpole_obs_4_big_decouple_output': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3 cae = CAE_cartpole_voxel_2d # dynamics: None -- without integration to dense trajectory dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #dynamics = None enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 pos_indices = np.array([0, 2]) vel_indices = np.array([1, 3]) elif args.env_type == 'acrobot_obs': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 pos_indices = [0, 1] vel_indices = [2, 3] elif args.env_type == 'acrobot_obs_2': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP2 cae = CAE_acrobot_voxel_2d_2 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 pos_indices = [0, 1] vel_indices = [2, 3] elif args.env_type == 'acrobot_obs_3': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_2 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 pos_indices = [0, 1] vel_indices = [2, 3] elif args.env_type == 'acrobot_obs_4': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 pos_indices = [0, 1] vel_indices = [2, 3] elif args.env_type == 'acrobot_obs_5': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 pos_indices = [0, 1] vel_indices = [2, 3] elif args.env_type == 'acrobot_obs_6': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP4 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 pos_indices = [0, 1] vel_indices = [2, 3] elif args.env_type == 'acrobot_obs_7': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP5 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_8': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP6 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 # set loss for mpnet if args.loss == 'mse': #mpnet.loss_f = nn.MSELoss() def mse_loss(y1, y2): l = (y1 - y2) ** 2 l = torch.mean(l, dim=0) # sum alone the batch dimension, now the dimension is the same as input dimension return l loss_f_p = mse_loss loss_f_v = mse_loss elif args.loss == 'l1_smooth': #mpnet.loss_f = nn.SmoothL1Loss() def l1_smooth_loss(y1, y2): l1 = torch.abs(y1 - y2) cond = l1 < 1 l = torch.where(cond, 0.5 * l1 ** 2, l1) l = torch.mean(l, dim=0) # sum alone the batch dimension, now the dimension is the same as input dimension return l loss_f_p = l1_smooth_loss loss_f_v = l1_smooth_loss elif args.loss == 'mse_decoupled': def mse_decoupled(y1, y2): # for angle terms, wrap it to -pi~pi l_0 = torch.abs(y1[:,0] - y2[:,0]) ** 2 l_1 = torch.abs(y1[:,1] - y2[:,1]) ** 2 l_2 = torch.abs(y1[:,2] - y2[:,2]) # angular dimension l_3 = torch.abs(y1[:,3] - y2[:,3]) ** 2 cond = (l_2 > 1.0) * (l_2 <= 2.0) l_2 = torch.where(cond, 2*1.0-l_2, l_2) l_2 = l_2 ** 2 l_0 = torch.mean(l_0) l_1 = torch.mean(l_1) l_2 = torch.mean(l_2) l_3 = torch.mean(l_3) return torch.stack([l_0, l_1, l_2, l_3]) loss_f_p = mse_decoupled loss_f_v = mse_decoupled elif args.loss == 'l1_smooth_decoupled': # this only is for cartpole, need to adapt to other systems #TODO def l1_smooth_decoupled(y1, y2): # for angle terms, wrap it to -pi~pi l_0 = torch.abs(y1[:,0] - y2[:,0]) l_1 = torch.abs(y1[:,1] - y2[:,1]) # angular dimension cond = (l_1 > 1.0) * (l_1 <= 2.0) l_1 = torch.where(cond, 2*1.0-l_1, l_1) # then change to l1_smooth_loss cond = l_0 < 1 l_0 = torch.where(cond, 0.5 * l_0 ** 2, l_0) cond = l_1 < 1 l_1 = torch.where(cond, 0.5 * l_1 ** 2, l_1) l_0 = torch.mean(l_0) l_1 = torch.mean(l_1) return torch.stack([l_0, l_1]) def l1_smooth_loss(y1, y2): l1 = torch.abs(y1 - y2) cond = l1 < 1 l = torch.where(cond, 0.5 * l1 ** 2, l1) l = torch.mean(l, dim=0) # sum alone the batch dimension, now the dimension is the same as input dimension return l loss_f_p = l1_smooth_decoupled loss_f_v = l1_smooth_loss if 'decouple_output' in args.env_type: print('mpnet using decoupled output') mpnet_pnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size//2, cae, mlp, loss_f_p) mpnet_vnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size//2, cae, mlp, loss_f_v) else: mpnet_pnet = KMPNet(args.total_input_size//2, args.AE_input_size, args.mlp_input_size, args.output_size//2, cae, mlp, loss_f_p) mpnet_vnet = KMPNet(args.total_input_size//2, args.AE_input_size, args.mlp_input_size, args.output_size//2, cae, mlp, loss_f_v) # load net # load previously trained model if start epoch > 0 model_dir = args.model_dir if args.loss == 'mse': if args.multigoal == 0: model_dir = model_dir+args.env_type+"_lr%f_%s_step_%d/" % (args.learning_rate, args.opt, args.num_steps) else: model_dir = model_dir+args.env_type+"_lr%f_%s_step_%d_multigoal/" % (args.learning_rate, args.opt, args.num_steps) else: if args.multigoal == 0: model_dir = model_dir+args.env_type+"_lr%f_%s_loss_%s_step_%d/" % (args.learning_rate, args.opt, args.loss, args.num_steps) else: model_dir = model_dir+args.env_type+"_lr%f_%s_loss_%s_step_%d_multigoal/" % (args.learning_rate, args.opt, args.loss, args.num_steps) if not os.path.exists(model_dir): os.makedirs(model_dir) model_pnet_path='kmpnet_pnet_epoch_%d_direction_%d_step_%d.pkl' %(args.start_epoch, args.direction, args.num_steps) model_vnet_path='kmpnet_vnet_epoch_%d_direction_%d_step_%d.pkl' %(args.start_epoch, args.direction, args.num_steps) torch_seed, np_seed, py_seed = 0, 0, 0 if args.start_epoch > 0: #load_net_state(mpnet, os.path.join(args.model_path, model_path)) load_net_state(mpnet_pnet, os.path.join(model_dir, model_pnet_path)) load_net_state(mpnet_vnet, os.path.join(model_dir, model_vnet_path)) #torch_seed, np_seed, py_seed = load_seed(os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed(os.path.join(model_dir, model_pnet_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpnet_pnet.cuda() mpnet_pnet.mlp.cuda() mpnet_pnet.encoder.cuda() mpnet_vnet.cuda() mpnet_vnet.mlp.cuda() mpnet_vnet.encoder.cuda() if args.opt == 'Adagrad': mpnet_pnet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpnet_pnet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpnet_pnet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) elif args.opt == 'ASGD': mpnet_pnet.set_opt(torch.optim.ASGD, lr=args.learning_rate) if args.opt == 'Adagrad': mpnet_vnet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpnet_vnet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpnet_vnet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) elif args.opt == 'ASGD': mpnet_vnet.set_opt(torch.optim.ASGD, lr=args.learning_rate) if args.start_epoch > 0: #load_opt_state(mpnet, os.path.join(args.model_path, model_path)) load_opt_state(mpnet_pnet, os.path.join(model_dir, model_path)) load_opt_state(mpnet_vnet, os.path.join(model_dir, model_path)) # load train and test data print('loading...') obs, waypoint_dataset, waypoint_targets, env_indices, \ _, _, _, _ = data_loader.load_train_dataset(N=args.no_env, NP=args.no_motion_paths, data_folder=args.path_folder, obs_f=True, direction=args.direction, dynamics=dynamics, enforce_bounds=enforce_bounds, system=system, step_sz=step_sz, num_steps=args.num_steps, multigoal=args.multigoal) # randomize the dataset before training data=list(zip(waypoint_dataset,waypoint_targets,env_indices)) random.shuffle(data) dataset,targets,env_indices=list(zip(*data)) dataset = list(dataset) dataset = np.array(dataset) targets = np.array(targets) print(np.concatenate([pos_indices, pos_indices+args.total_input_size//2])) p_dataset = dataset[:, np.concatenate([pos_indices, pos_indices+args.total_input_size//2])] v_dataset = dataset[:, np.concatenate([vel_indices, vel_indices+args.total_input_size//2])] if 'decouple_output' in args.env_type: # only decouple output print('only decouple output but not input') p_dataset = dataset v_dataset = dataset print(p_dataset.shape) print(v_dataset.shape) p_targets = targets[:,pos_indices] v_targets = targets[:,vel_indices] # this is only for cartpole # TODO: add string for choosing env p_targets = list(p_targets) v_targets = list(v_targets) #targets = list(targets) env_indices = list(env_indices) dataset = np.array(dataset) #targets = np.array(targets) env_indices = np.array(env_indices) # use 5% as validation dataset val_len = int(len(dataset) * 0.05) val_p_dataset = p_dataset[-val_len:] val_v_dataset = v_dataset[-val_len:] val_p_targets = p_targets[-val_len:] val_v_targets = v_targets[-val_len:] val_env_indices = env_indices[-val_len:] p_dataset = p_dataset[:-val_len] v_dataset = v_dataset[:-val_len] p_targets = p_targets[:-val_len] v_targets = v_targets[:-val_len] env_indices = env_indices[:-val_len] # Train the Models print('training...') if args.loss == 'mse': if args.multigoal == 0: writer_fname = 'pos_vel_%s_%f_%s_direction_%d_step_%d' % (args.env_type, args.learning_rate, args.opt, args.direction, args.num_steps, ) else: writer_fname = 'pos_vel_%s_%f_%s_direction_%d_step_%d_multigoal' % (args.env_type, args.learning_rate, args.opt, args.direction, args.num_steps, ) else: if args.multigoal == 0: writer_fname = 'pos_vel_%s_%f_%s_direction_%d_step_%d_loss_%s' % (args.env_type, args.learning_rate, args.opt, args.direction, args.num_steps, args.loss, ) else: writer_fname = 'pos_vel_%s_%f_%s_direction_%d_step_%d_loss_%s_multigoal' % (args.env_type, args.learning_rate, args.opt, args.direction, args.num_steps, args.loss, ) writer = SummaryWriter('./runs/'+writer_fname) record_i = 0 val_record_i = 0 p_loss_avg_i = 0 p_val_loss_avg_i = 0 p_loss_avg = 0. p_val_loss_avg = 0. v_loss_avg_i = 0 v_val_loss_avg_i = 0 v_loss_avg = 0. v_val_loss_avg = 0. loss_steps = 100 # record every 100 loss world_size = np.array(args.world_size) pos_world_size = list(world_size[pos_indices]) vel_world_size = list(world_size[vel_indices]) for epoch in range(args.start_epoch+1,args.num_epochs+1): print('epoch' + str(epoch)) val_i = 0 for i in range(0,len(p_dataset),args.batch_size): print('epoch: %d, training... path: %d' % (epoch, i+1)) p_dataset_i = p_dataset[i:i+args.batch_size] v_dataset_i = v_dataset[i:i+args.batch_size] p_targets_i = p_targets[i:i+args.batch_size] v_targets_i = v_targets[i:i+args.batch_size] env_indices_i = env_indices[i:i+args.batch_size] # record p_bi = p_dataset_i.astype(np.float32) v_bi = v_dataset_i.astype(np.float32) print('p_bi shape:') print(p_bi.shape) print('v_bi shape:') print(v_bi.shape) p_bt = p_targets_i v_bt = v_targets_i p_bi = torch.FloatTensor(p_bi) v_bi = torch.FloatTensor(v_bi) p_bt = torch.FloatTensor(p_bt) v_bt = torch.FloatTensor(v_bt) # edit: disable this for investigation of the good weights for training, and for wrapping if 'decouple_output' in args.env_type: print('using normalizatino of decoupled output') # only decouple output but not input p_bi, v_bi, p_bt, v_bt = normalize(p_bi, args.world_size), normalize(v_bi, args.world_size), normalize(p_bt, pos_world_size), normalize(v_bt, vel_world_size) else: p_bi, v_bi, p_bt, v_bt = normalize(p_bi, pos_world_size), normalize(v_bi, vel_world_size), normalize(p_bt, pos_world_size), normalize(v_bt, vel_world_size) mpnet_pnet.zero_grad() mpnet_vnet.zero_grad() p_bi=to_var(p_bi) v_bi=to_var(v_bi) p_bt=to_var(p_bt) v_bt=to_var(v_bt) if obs is None: bobs = None else: bobs = obs[env_indices_i].astype(np.float32) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) print('-------pnet-------') print('before training losses:') print(mpnet_pnet.loss(mpnet_pnet(p_bi, bobs), p_bt)) mpnet_pnet.step(p_bi, bobs, p_bt) print('after training losses:') print(mpnet_pnet.loss(mpnet_pnet(p_bi, bobs), p_bt)) p_loss = mpnet_pnet.loss(mpnet_pnet(p_bi, bobs), p_bt) #update_line(hl, ax, [i//args.batch_size, loss.data.numpy()]) p_loss_avg += p_loss.cpu().data p_loss_avg_i += 1 print('-------vnet-------') print('before training losses:') print(mpnet_vnet.loss(mpnet_vnet(v_bi, bobs), v_bt)) mpnet_vnet.step(v_bi, bobs, v_bt) print('after training losses:') print(mpnet_vnet.loss(mpnet_vnet(v_bi, bobs), v_bt)) v_loss = mpnet_vnet.loss(mpnet_vnet(v_bi, bobs), v_bt) #update_line(hl, ax, [i//args.batch_size, loss.data.numpy()]) v_loss_avg += v_loss.cpu().data v_loss_avg_i += 1 if p_loss_avg_i >= loss_steps: p_loss_avg = p_loss_avg / p_loss_avg_i writer.add_scalar('p_train_loss_0', p_loss_avg[0], record_i) writer.add_scalar('p_train_loss_1', p_loss_avg[1], record_i) v_loss_avg = v_loss_avg / v_loss_avg_i writer.add_scalar('v_train_loss_0', v_loss_avg[0], record_i) writer.add_scalar('v_train_loss_1', v_loss_avg[1], record_i) record_i += 1 p_loss_avg = 0. p_loss_avg_i = 0 v_loss_avg = 0. v_loss_avg_i = 0 # validation # calculate the corresponding batch in val_dataset p_dataset_i = val_p_dataset[val_i:val_i+args.batch_size] v_dataset_i = val_v_dataset[val_i:val_i+args.batch_size] p_targets_i = val_p_targets[val_i:val_i+args.batch_size] v_targets_i = val_v_targets[val_i:val_i+args.batch_size] env_indices_i = val_env_indices[val_i:val_i+args.batch_size] val_i = val_i + args.batch_size if val_i > val_len: val_i = 0 # record p_bi = p_dataset_i.astype(np.float32) v_bi = v_dataset_i.astype(np.float32) print('p_bi shape:') print(p_bi.shape) print('v_bi shape:') print(v_bi.shape) p_bt = p_targets_i v_bt = v_targets_i p_bi = torch.FloatTensor(p_bi) v_bi = torch.FloatTensor(v_bi) p_bt = torch.FloatTensor(p_bt) v_bt = torch.FloatTensor(v_bt) if 'decouple_output' in args.env_type: # only decouple output but not input p_bi, v_bi, p_bt, v_bt = normalize(p_bi, args.world_size), normalize(v_bi, args.world_size), normalize(p_bt, pos_world_size), normalize(v_bt, vel_world_size) else: p_bi, v_bi, p_bt, v_bt = normalize(p_bi, pos_world_size), normalize(v_bi, vel_world_size), normalize(p_bt, pos_world_size), normalize(v_bt, vel_world_size) p_bi=to_var(p_bi) v_bi=to_var(v_bi) p_bt=to_var(p_bt) v_bt=to_var(v_bt) if obs is None: bobs = None else: bobs = obs[env_indices_i].astype(np.float32) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) print('-------pnet loss--------') p_loss = mpnet_pnet.loss(mpnet_pnet(p_bi, bobs), p_bt) print('validation loss: ' % (p_loss.cpu().data)) p_val_loss_avg += p_loss.cpu().data p_val_loss_avg_i += 1 print('-------vnet loss--------') v_loss = mpnet_vnet.loss(mpnet_vnet(v_bi, bobs), v_bt) print('validation loss: ' % (v_loss.cpu().data)) v_val_loss_avg += v_loss.cpu().data v_val_loss_avg_i += 1 if p_val_loss_avg_i >= loss_steps: p_val_loss_avg = p_val_loss_avg / p_val_loss_avg_i writer.add_scalar('p_val_loss_0', p_val_loss_avg[0], val_record_i) writer.add_scalar('p_val_loss_1', p_val_loss_avg[1], val_record_i) v_val_loss_avg = v_val_loss_avg / v_val_loss_avg_i writer.add_scalar('v_val_loss_0', v_val_loss_avg[0], val_record_i) writer.add_scalar('v_val_loss_1', v_val_loss_avg[1], val_record_i) val_record_i += 1 p_val_loss_avg = 0. p_val_loss_avg_i = 0 v_val_loss_avg = 0. v_val_loss_avg_i = 0 # Save the models if epoch > 0 and epoch % 50 == 0: model_pnet_path='kmpnet_pnet_epoch_%d_direction_%d_step_%d.pkl' %(epoch, args.direction, args.num_steps) model_vnet_path='kmpnet_vnet_epoch_%d_direction_%d_step_%d.pkl' %(epoch, args.direction, args.num_steps) #save_state(mpnet, torch_seed, np_seed, py_seed, os.path.join(args.model_path,model_path)) save_state(mpnet_pnet, torch_seed, np_seed, py_seed, os.path.join(model_dir,model_pnet_path)) save_state(mpnet_vnet, torch_seed, np_seed, py_seed, os.path.join(model_dir,model_vnet_path)) writer.export_scalars_to_json("./all_scalars.json") writer.close()
def main(args): #global hl if torch.cuda.is_available(): torch.cuda.set_device(args.device) # environment setting multigoal = False cpp_propagator = _sst_module.SystemPropagator() if args.env_type == 'pendulum': normalize = pendulum.normalize unnormalize = pendulum.unnormalize system = standard_cpp_systems.PSOPTPendulum() dynamics = None enforce_bounds = None step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole': normalize = cart_pole.normalize unnormalize = cart_pole.unnormalize dynamics = cartpole.dynamics system = _sst_module.CartPole() enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.CartPole() dynamics = cartpole.dynamics enforce_bounds = cartpole.enforce_bounds step_sz = 0.002 num_steps = 20 cae = cae_identity mlp = MLP elif args.env_type == 'cartpole_obs_4': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3_no_dropout cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) multigoal = False enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs_4_multigoal': normalize = cart_pole_obs.normalize unnormalize = cart_pole_obs.unnormalize system = _sst_module.PSOPTCartPole() mlp = mlp_cartpole.MLP3_no_dropout cae = CAE_cartpole_voxel_2d dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #dynamics = None multigoal = True enforce_bounds = cart_pole_obs.enforce_bounds step_sz = 0.002 num_steps = 20 elif args.env_type == 'acrobot_obs': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_2': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP2 cae = CAE_acrobot_voxel_2d_2 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_3': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_2 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_4': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_5': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_6': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP4 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_7': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP5 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 elif args.env_type == 'acrobot_obs_8': normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize system = _sst_module.PSOPTAcrobot() mlp = mlp_acrobot.MLP6 cae = CAE_acrobot_voxel_2d_3 #dynamics = acrobot_obs.dynamics dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) enforce_bounds = acrobot_obs.enforce_bounds step_sz = 0.02 num_steps = 20 # set loss for mpnet if args.loss == 'mse': #mpnet.loss_f = nn.MSELoss() def mse_loss(y1, y2): l = (y1 - y2)**2 l = torch.mean( l, dim=0 ) # sum alone the batch dimension, now the dimension is the same as input dimension return l loss_f = mse_loss elif args.loss == 'l1_smooth': #mpnet.loss_f = nn.SmoothL1Loss() def l1_smooth_loss(y1, y2): l1 = torch.abs(y1 - y2) cond = l1 < 1 l = torch.where(cond, 0.5 * l1**2, l1) l = torch.mean( l, dim=0 ) # sum alone the batch dimension, now the dimension is the same as input dimension loss_f = l1_smooth_loss elif args.loss == 'mse_decoupled': def mse_decoupled(y1, y2): # for angle terms, wrap it to -pi~pi l_0 = torch.abs(y1[:, 0] - y2[:, 0])**2 l_1 = torch.abs(y1[:, 1] - y2[:, 1])**2 l_2 = torch.abs(y1[:, 2] - y2[:, 2]) # angular dimension l_3 = torch.abs(y1[:, 3] - y2[:, 3])**2 cond = (l_2 > 1.0) * (l_2 <= 2.0 ) # np.pi after normalization is 1.0 l_2 = torch.where(cond, 2.0 - l_2, l_2) l_2 = l_2**2 l_0 = torch.mean(l_0) l_1 = torch.mean(l_1) l_2 = torch.mean(l_2) l_3 = torch.mean(l_3) return torch.stack([l_0, l_1, l_2, l_3]) loss_f = mse_decoupled mpnet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp, loss_f) # load net # load previously trained model if start epoch > 0 model_dir = args.model_dir model_dir = model_dir + 'cost_' + args.env_type + "_lr%f_%s_step_%d/" % ( args.learning_rate, args.opt, args.num_steps) if not os.path.exists(model_dir): os.makedirs(model_dir) model_path = 'cost_kmpnet_epoch_%d_direction_%d_step_%d.pkl' % ( args.start_epoch, args.direction, args.num_steps) torch_seed, np_seed, py_seed = 0, 0, 0 if args.start_epoch > 0: #load_net_state(mpnet, os.path.join(args.model_path, model_path)) load_net_state(mpnet, os.path.join(model_dir, model_path)) #torch_seed, np_seed, py_seed = load_seed(os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(model_dir, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpnet.cuda() mpnet.mlp.cuda() mpnet.encoder.cuda() if args.opt == 'Adagrad': mpnet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpnet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpnet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) elif args.opt == 'ASGD': mpnet.set_opt(torch.optim.ASGD, lr=args.learning_rate) if args.start_epoch > 0: #load_opt_state(mpnet, os.path.join(args.model_path, model_path)) load_opt_state(mpnet, os.path.join(model_dir, model_path)) # load train and test data print('loading...') obs, cost_dataset, cost_targets, env_indices, \ _, _, _, _ = data_loader.load_train_dataset_cost(N=args.no_env, NP=args.no_motion_paths, data_folder=args.path_folder, obs_f=True, direction=args.direction, dynamics=dynamics, enforce_bounds=enforce_bounds, system=system, step_sz=step_sz, num_steps=args.num_steps, multigoal=multigoal) # randomize the dataset before training data = list(zip(cost_dataset, cost_targets, env_indices)) random.shuffle(data) dataset, targets, env_indices = list(zip(*data)) dataset = list(dataset) targets = list(targets) env_indices = list(env_indices) dataset = np.array(dataset) targets = np.array(targets) env_indices = np.array(env_indices) # use 5% as validation dataset val_len = int(len(dataset) * 0.05) val_dataset = dataset[-val_len:] val_targets = targets[-val_len:] val_env_indices = env_indices[-val_len:] dataset = dataset[:-val_len] targets = targets[:-val_len] env_indices = env_indices[:-val_len] # Train the Models print('training...') writer_fname = 'cost_%s_%f_%s_direction_%d_step_%d' % ( args.env_type, args.learning_rate, args.opt, args.direction, args.num_steps) writer = SummaryWriter('./runs/' + writer_fname) record_i = 0 val_record_i = 0 loss_avg_i = 0 val_loss_avg_i = 0 loss_avg = 0. val_loss_avg = 0. loss_steps = 100 # record every 100 loss for epoch in range(args.start_epoch + 1, args.num_epochs + 1): print('epoch' + str(epoch)) val_i = 0 for i in range(0, len(dataset), args.batch_size): print('epoch: %d, training... path: %d' % (epoch, i + 1)) dataset_i = dataset[i:i + args.batch_size] targets_i = targets[i:i + args.batch_size] env_indices_i = env_indices[i:i + args.batch_size] # record bi = dataset_i.astype(np.float32) print('bi shape:') print(bi.shape) bt = targets_i bi = torch.FloatTensor(bi) bt = torch.FloatTensor(bt) bi = normalize(bi, args.world_size) mpnet.zero_grad() bi = to_var(bi) bt = to_var(bt) if obs is None: bobs = None else: bobs = obs[env_indices_i].astype(np.float32) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) print('before training losses:') print(mpnet.loss(mpnet(bi, bobs), bt)) mpnet.step(bi, bobs, bt) print('after training losses:') print(mpnet.loss(mpnet(bi, bobs), bt)) loss = mpnet.loss(mpnet(bi, bobs), bt) #update_line(hl, ax, [i//args.batch_size, loss.data.numpy()]) loss_avg += loss.cpu().data loss_avg_i += 1 if loss_avg_i >= loss_steps: loss_avg = loss_avg / loss_avg_i writer.add_scalar('train_loss', loss_avg, record_i) record_i += 1 loss_avg = 0. loss_avg_i = 0 # validation # calculate the corresponding batch in val_dataset dataset_i = val_dataset[val_i:val_i + args.batch_size] targets_i = val_targets[val_i:val_i + args.batch_size] env_indices_i = val_env_indices[val_i:val_i + args.batch_size] val_i = val_i + args.batch_size if val_i > val_len: val_i = 0 # record bi = dataset_i.astype(np.float32) print('bi shape:') print(bi.shape) bt = targets_i bi = torch.FloatTensor(bi) bt = torch.FloatTensor(bt) bi = normalize(bi, args.world_size) bi = to_var(bi) bt = to_var(bt) if obs is None: bobs = None else: bobs = obs[env_indices_i].astype(np.float32) bobs = torch.FloatTensor(bobs) bobs = to_var(bobs) loss = mpnet.loss(mpnet(bi, bobs), bt) print('validation loss: %f' % (loss.cpu().data)) val_loss_avg += loss.cpu().data val_loss_avg_i += 1 if val_loss_avg_i >= loss_steps: val_loss_avg = val_loss_avg / val_loss_avg_i writer.add_scalar('val_loss', val_loss_avg, val_record_i) val_record_i += 1 val_loss_avg = 0. val_loss_avg_i = 0 # Save the models if epoch > 0 and epoch % 50 == 0: model_path = 'cost_kmpnet_epoch_%d_direction_%d_step_%d.pkl' % ( epoch, args.direction, args.num_steps) #save_state(mpnet, torch_seed, np_seed, py_seed, os.path.join(args.model_path,model_path)) save_state(mpnet, torch_seed, np_seed, py_seed, os.path.join(model_dir, model_path)) writer.export_scalars_to_json("./all_scalars.json") writer.close()
controls = open( '../data/cartpole_obs/%d/control_%d.pkl' % (obs_idx, p_idx), 'rb') controls = pickle.load(controls) costs = open('../data/cartpole_obs/%d/cost_%d.pkl' % (obs_idx, p_idx), 'rb') costs = pickle.load(costs) params = {} params['pole_l'] = 2.5 params['pole_w'] = 0.1 params['cart_w'] = 1. params['cart_h'] = 0.5 params['obs_w'] = 4 params['obs_h'] = 4 params['integration_step'] = 0.002 #system = CartPole(obs_list) system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) vis = CartPoleVisualizer(dynamics, params) states = path actions = controls anim = vis.animate(np.array(states), np.array(actions), np.array(costs), obs_list) #HTML(anim.to_html5_video()) anim.save('../cartpole_env%d_path%d_data.mp4' % (obs_idx, p_idx), writer=writer) # for sst # Create custom system
def main(args): # set seed torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) np.random.seed(np_seed) random.seed(py_seed) # Build the models # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': obs_file = None obc_file = None obs_f = True obs_width = 4.0 step_sz = 0.002 psopt_system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() #system = standard_cpp_systems.RectangleObs(obs, 4., 'cartpole') dynamics = lambda x, u, t: cpp_propagator.propagate(psopt_system, x, u, t) cpp_state_validator = lambda x, obs: cpp_propagator.cartpole_validate(x, obs, obs_width) #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None obs_f = True obs_width = 6.0 #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'acrobot') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset(args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. # find path # randomly pick up a point in the data, and find similar data in the dataset # plot the next point obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data for envi in range(10): for pathi in range(20): obs_i = obs[envi] new_obs_i = [] obs_i = obs[envi] plan_res_path = [] plan_time_path = [] plan_cost_path = [] data_cost_path = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) new_obs_i.append(obs_pt) obs_i = new_obs_i """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*30./dx) jmin = 0 jmax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx*i-30, 0., dtheta*j-np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) print('state:', x) print('python collison') print("cpp collision result: ", cpp_state_validator(x, obs[envi])) else: feasible_points.append(x) print('state:', x) print('python not in collison') print("cpp collision result: ", cpp_state_validator(x, obs[envi])) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,2], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state """ xs = paths[envi][pathi] us = controls[envi][pathi] ts = costs[envi][pathi] # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(ts[k]/step_sz)) accum_cost = 0. print('p_start:') print(p_start) print('data:') print(paths[envi][pathi][k]) # comment this out to test corrected data versus previous data #p_start = xs[k] #state[-1] = xs[k] for step in range(1,max_steps+1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. print('state:', p_start) print('python IsInCollison result: ', IsInCollision(p_start, obs_i)) print("cpp validate result: ", cpp_state_validator(p_start, obs[envi])) assert not IsInCollision(p_start, obs_i) print('p_start:') print(p_start) print('data:') print(paths[envi][pathi][-1]) #state[-1] = xs[-1] """