def main(): param = Param() env = CityMap(param) if param.make_dataset_on: print(' making dataset...') train_dataset, test_dataset = datahandler.make_dataset(env) datahandler.write_dataset(env, train_dataset, test_dataset) datahandler.load_dataset(env) train_dataset = env.train_dataset test_dataset = env.test_dataset train_w = env.get_customer_demand(train_dataset, train_dataset[-1, 0]) test_w = env.get_customer_demand(test_dataset, test_dataset[-1, 0]) print(train_dataset[0, :]) print( np.linalg.norm([ train_dataset[0, 4] - train_dataset[0, 2], train_dataset[0, 5] - train_dataset[0, 3] ])) print(test_dataset[0, :]) print( np.linalg.norm([ test_dataset[0, 4] - test_dataset[0, 2], test_dataset[0, 5] - test_dataset[0, 3] ])) # dataset = [\ # starttime [s], # duration [s], # x_p (longitude) [degrees], # y_p (latitute) [degrees], # x_d (longitude) [degrees], # x_d (latitute) [degrees], #] print('train_dataset.shape', train_dataset.shape) print('test_dataset.shape', test_dataset.shape) print('train ave_length: ', ave_length(train_dataset)) print('test ave_length: ', ave_length(test_dataset)) print('train ave_duration: ', ave_duration(train_dataset)) print('test ave_duration: ', ave_duration(test_dataset)) print('train ave_customer_per_time: ', ave_customer_per_time(train_dataset)) print('test ave_customer_per_time: ', ave_customer_per_time(test_dataset)) print('train ave taxi speed: ', ave_taxi_speed(train_dataset)) print('test ave taxi speed: ', ave_taxi_speed(test_dataset)) print('train_dataset[0:1,:]:', train_dataset[0:1, :]) print('test_dataset[0:1,:]:', test_dataset[0:1, :]) fig, ax = plt.subplots() ax.plot(train_dataset[:, 0]) ax.set_ylabel('pickup time') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot(test_dataset[:, 0]) ax.set_ylabel('pickup time') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot(train_dataset[:, 1]) ax.set_ylabel('train duration') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot(test_dataset[:, 1]) ax.set_ylabel('test duration') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot( np.linalg.norm([ train_dataset[:, 4] - train_dataset[:, 2], train_dataset[:, 5] - train_dataset[:, 3] ], axis=0)) ax.set_ylabel('train length') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot( np.linalg.norm([ test_dataset[:, 4] - test_dataset[:, 2], test_dataset[:, 5] - test_dataset[:, 3] ], axis=0)) ax.set_ylabel('test length') ax.set_xlabel('customer request #') print('saving and opening figs...') plotter.save_figs('dataset_plots.pdf') plotter.open_figs('dataset_plots.pdf')
# # plotting if True: env.render() for i_config in range(1): #range(env.state_dim_per_agent): fig,ax = plotter.make_fig() # ax.set_title(env.states_name[i_config]) for agent in env.agents: if env.good_nodes[agent.i]: color = 'blue' else: color = 'red' ax.plot( times[0:steps], states[0:steps,env.agent_idx_to_state_idx(agent.i)+i_config], color=color) ax.axhline( env.desired_ave, label='desired', color='green') print('Number of Trajectories:', trajectory_rollout_count) print('Data Shape: ', data.shape) file = os.path.join(param.il_load_dataset,'rl_data.npy') np.save(file, data) plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn)
def sim(param, env, controllers, initial_state, visualize): def run_sim(controller, initial_state): states = np.zeros((len(times), env.n)) actions = np.zeros((len(times) - 1, env.m)) states[0] = env.reset(initial_state) reward = 0 for step, time in enumerate(times[:-1]): state = states[step] if param.pomdp_on: observation = env.observe() else: observation = state action = controller.policy(observation) states[step + 1], r, done, _ = env.step(action) reward += r actions[step] = action.flatten() if done: break print('reward: ', reward) env.close() return states, actions, step # ------------------------------------------- # environment times = param.sim_times device = "cpu" if initial_state is None: initial_state = env.reset() print("Initial State: ", initial_state) # run sim SimResult = namedtuple('SimResult', ['states', 'actions', 'steps', 'name']) for name, controller in controllers.items(): print("Running simulation with " + name) if hasattr(controller, 'policy'): result = SimResult._make( run_sim(controller, initial_state) + (name, )) else: result = SimResult._make((controller.states, controller.actions, controller.steps, name)) sim_results = [] sim_results.append(result) if param.sim_render_on: env.render() # plot time varying states if param.single_agent_sim: for i in range(env.n): fig, ax = plotter.subplots() ax.set_title(env.states_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.states[0:result.steps, i], label=result.name) ax.legend() for i, name in enumerate(env.deduced_state_names): fig, ax = plotter.subplots() ax.set_title(name) for result in sim_results: deduce_states = np.empty( (result.steps, len(env.deduced_state_names))) for j in range(result.steps): deduce_states[j] = env.deduce_state(result.states[j]) ax.plot(times[0:result.steps], deduce_states[:, i], label=result.name) ax.legend() for i in range(env.m): fig, ax = plotter.subplots() ax.set_title(env.actions_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.actions[0:result.steps, i], label=result.name) ax.legend() elif param.multi_agent_sim: for i_config in range(1): #range(env.state_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.states_name[i_config]) for agent in env.agents: if env.good_nodes[agent.i]: color = 'blue' else: color = 'red' for result in sim_results: ax.plot( times[0:result.steps], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name, color=color) ax.axhline(env.desired_ave, label='desired', color='green') # # plot state space # if param.multi_agent_sim: # fig,ax = plotter.make_fig() # for agent in env.agents: # ax.set_title('State Space') # for result in sim_results: # ax.plot( # result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)], # result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)+1], # label=result.name) # extract gains if param.il_controller_class in ['PID_wRef', 'PID']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break kp, kd = util.extract_gains(controller, result.states[0:result.steps]) fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 0], title='Kp pos') fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 1], title='Kp theta') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 0], title='Kd pos') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 1], title='Kd theta') # extract reference trajectory if param.il_controller_class in ['PID_wRef', 'Ref']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break ref_state = util.extract_ref_state(controller, result.states) for i in range(env.n): fig, ax = plotter.plot(times[1:result.steps + 1], ref_state[0:result.steps, i], title="ref " + env.states_name[i]) plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn) # visualize if visualize: # plotter.visualize(param, env, states_deeprl) env.visualize(sim_results[0].states[0:result.steps], 0.1)
def main(): param = Param() param.desired_env_ncell = 2000 param.xmin_thresh = None param.xmax_thresh = None param.ymin_thresh = None param.ymax_thresh = None param.update() env = CityMap(param) # init datasets if param.make_dataset_on: print(' making dataset...') train_dataset, test_dataset = datahandler.make_dataset(env) datahandler.write_dataset(env, train_dataset, test_dataset) print(' loading dataset...') datahandler.load_dataset(env) # env.init_map() # fig,ax=plotter.make_fig() # env.print_map(fig=fig,ax=ax) train_heatmap = env.heatmap(env.train_dataset) test_heatmap = env.heatmap(env.test_dataset) fig = plt.figure() gs = fig.add_gridspec(2,3) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1]) ax3 = fig.add_subplot(gs[0, 2]) ax4 = fig.add_subplot(gs[1, :]) plotter.plot_heatmap(env,train_heatmap,ax1) plotter.plot_heatmap(env,train_heatmap,ax2) plotter.plot_heatmap(env,test_heatmap,ax3) plotter.plot_cell_demand_over_time(env, ax4) ax2.set_ylim(bottom=41.85) ax2.set_ylim(top=42.) ax2.set_xlim(left=-87.7) ax2.set_xlim(right=-87.6) ax3.set_ylim(bottom=41.85) ax3.set_ylim(top=42.) ax3.set_xlim(left=-87.7) ax3.set_xlim(right=-87.6) ax1.axis("off") ax2.axis("off") ax3.axis("off") ax1.set_title('Chicago',fontsize=10) ax2.set_title('October 29, 2016',fontsize=7.5) ax3.set_title('October 30, 2016',fontsize=7.5) # ax1.get_yaxis().set_visible(False) # ax2.get_xaxis().set_visible(False) # ax2.get_yaxis().set_visible(False) print('saving and opening figs...') plotter.save_figs(param.plot_fn) plotter.open_figs(param.plot_fn)
def sim(param, env, controllers, initial_state, visualize): # environment times = param.sim_times device = "cpu" if initial_state is None: initial_state = env.reset() # run sim SimResult = namedtuple( 'SimResult', ['states', 'observations', 'actions', 'steps', 'name']) for name, controller in controllers.items(): print("Running simulation with " + name) print("Initial State: ", initial_state) if hasattr(controller, 'policy'): result = SimResult._make( run_sim(param, env, controller, initial_state) + (name, )) else: observations = [] result = SimResult._make( (controller.states, observations, controller.actions, controller.steps, name)) sim_results = [] sim_results.append(result) # plot state space if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing', 'DoubleIntegrator' ]: fig, ax = plotter.make_fig() ax.set_title('State Space') ax.set_aspect('equal') for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) for agent in env.agents: line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i)], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + 1], alpha=0.5) color = line[0].get_color() # plot velocity vectors: X = [] Y = [] U = [] V = [] for k in np.arange(0, result.steps, 100): X.append( result.states[k, env.agent_idx_to_state_idx(agent.i)]) Y.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 1]) if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing' ]: # Singleintegrator: plot actions U.append(result.actions[k, 2 * agent.i + 0]) V.append(result.actions[k, 2 * agent.i + 1]) elif param.env_name in ['DoubleIntegrator']: # doubleintegrator: plot velocities U.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 2]) V.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 3]) ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color=color, width=0.005) # num_obstacles = (result.observations.shape[1] - result.observations[0][0] - 5) / 2 # print(num_obstacles) # # print(result.observations) plotter.plot_circle( result.states[1, env.agent_idx_to_state_idx(agent.i)], result.states[1, env.agent_idx_to_state_idx(agent.i) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plotter.plot_square(result.states[-1,env.agent_idx_to_state_idx(agent.i)], # result.states[-1,env.agent_idx_to_state_idx(agent.i)+1],param.r_agent,fig=fig,ax=ax,color=color) plotter.plot_square(agent.s_g[0], agent.s_g[1], param.r_agent, angle=45, fig=fig, ax=ax, color=color) # draw state for each time step robot = 1 if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing' ]: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 3 - 2 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] idx = 3 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.25, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 for i in range(num_obstacles): # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5]) # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) # # import matplotlib.pyplot as plt # # plt.savefig("test.svg") # # exit() elif param.env_name in ['DoubleIntegrator']: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 5 - 4 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] X = [] Y = [] U = [] V = [] idx = 5 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos X.append(pos[0]) Y.append(pos[1]) U.append(observation[idx + 2]) V.append(observation[idx + 3]) # print(np.linalg.norm(observation[idx+2:idx+4])) ax.add_patch( Circle(pos, param.r_agent, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 4 for i in range(num_obstacles): pos = observation[idx:idx + 2] + robot_pos - np.array( [0.5, 0.5]) ax.add_patch( Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) # pos = observation[idx : idx+2] + robot_pos # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) X.append(robot_pos[0]) Y.append(robot_pos[1]) U.append(observation[3]) V.append(observation[4]) # plot velocity vectors ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color='red', width=0.005) elif param.env_name == 'Consensus' and param.sim_render_on: env.render() elif param.sim_render_on: env.render() # plot time varying states if param.single_agent_sim: for i in range(env.n): fig, ax = plotter.subplots() ax.set_title(env.states_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.states[0:result.steps, i], label=result.name) ax.legend() for i, name in enumerate(env.deduced_state_names): fig, ax = plotter.subplots() ax.set_title(name) for result in sim_results: deduce_states = np.empty( (result.steps, len(env.deduced_state_names))) for j in range(result.steps): deduce_states[j] = env.deduce_state(result.states[j]) ax.plot(times[0:result.steps], deduce_states[:, i], label=result.name) ax.legend() for i in range(env.m): fig, ax = plotter.subplots() ax.set_title(env.actions_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.actions[0:result.steps, i], label=result.name) ax.legend() elif param.env_name == 'Consensus': for i_config in range(1): #range(env.state_dim_per_agent): fig, ax = plotter.make_fig() # ax.set_title(env.states_name[i_config]) for agent in env.agents: if env.good_nodes[agent.i]: color = 'blue' else: color = 'red' for result in sim_results: ax.plot( times[0:result.steps], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name, color=color) ax.axhline(env.desired_ave, label='desired', color='green') elif param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.state_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.states_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.states[1:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name) # plot time varying actions if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.action_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.actions_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.actions[1:result.steps, agent.i * env.action_dim_per_agent + i_config], label=result.name) # extract gains if name in ['PID_wRef', 'PID']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break kp, kd = util.extract_gains(controller, result.states[0:result.steps]) fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 0], title='Kp pos') fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 1], title='Kp theta') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 0], title='Kd pos') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 1], title='Kd theta') # extract reference trajectory if name in ['PID_wRef', 'Ref']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break ref_state = util.extract_ref_state(controller, result.states) for i in range(env.n): fig, ax = plotter.plot(times[1:result.steps + 1], ref_state[0:result.steps, i], title="ref " + env.states_name[i]) # extract belief topology if name in ['IL'] and param.env_name is 'Consensus': for result in sim_results: if result.name == 'IL': break fig, ax = plotter.make_fig() belief_topology = util.extract_belief_topology( controller, result.observations) label_on = True if param.n_agents * param.n_agents > 10: label_on = False for i_agent in range(param.n_agents): for j_agent in range(param.n_agents): if not i_agent == j_agent and env.good_nodes[i_agent]: if label_on: plotter.plot(times[0:result.steps + 1], belief_topology[:, i_agent, j_agent], fig=fig, ax=ax, label="K:{}{}".format( i_agent, j_agent)) else: plotter.plot(times[0:result.steps + 1], belief_topology[:, i_agent, j_agent], fig=fig, ax=ax) plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn) # visualize if visualize: # plotter.visualize(param, env, states_deeprl) env.visualize(sim_results[0].states[0:result.steps], 0.1)
else: # micro sim # - plot sim if default_param.plot_sim_over_time: for sim_result in sim_results: controller_name = sim_result["controller_name"] times = sim_result["times"] plotter.sim_plot_over_time(controller_name,sim_result,times) else: for sim_result in sim_results: for timestep,time in enumerate(sim_result["times"]): controller_name = sim_result["controller_name"] plotter.render(controller_name,sim_result,timestep) # break # - plot reward plotter.plot_cumulative_reward(sim_results) # - plot q value estimation for controller_name in default_param.controller_names: if 'bellman' in controller_name: plotter.plot_q_error(sim_results) break print('saving and opening figs...') plotter.save_figs(default_param.plot_fn) plotter.open_figs(default_param.plot_fn)
def sim(param, env, controllers, initial_state, visualize): # environment times = param.sim_times device = "cpu" if initial_state is None: initial_state = env.reset() # run sim SimResult = namedtuple( 'SimResult', ['states', 'observations', 'actions', 'steps', 'name']) for name, controller in controllers.items(): print("Running simulation with " + name) print("Initial State: ", initial_state) if hasattr(controller, 'policy'): result = SimResult._make( run_sim(param, env, controller, initial_state) + (name, )) else: observations = [] result = SimResult._make( (controller.states, observations, controller.actions, controller.steps, name)) sim_results = [] sim_results.append(result) # plot state space if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing', 'DoubleIntegrator' ]: fig, ax = plotter.make_fig() ax.set_title('State Space') ax.set_aspect('equal') for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) for agent in env.agents: line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i)], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + 1], alpha=0.5) color = line[0].get_color() # plot velocity vectors: X = [] Y = [] U = [] V = [] for k in np.arange(0, result.steps, 100): X.append( result.states[k, env.agent_idx_to_state_idx(agent.i)]) Y.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 1]) if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing' ]: # Singleintegrator: plot actions U.append(result.actions[k, 2 * agent.i + 0]) V.append(result.actions[k, 2 * agent.i + 1]) elif param.env_name in ['DoubleIntegrator']: # doubleintegrator: plot velocities U.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 2]) V.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 3]) ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color=color, width=0.005) plotter.plot_circle( result.states[1, env.agent_idx_to_state_idx(agent.i)], result.states[1, env.agent_idx_to_state_idx(agent.i) + 1], param.r_agent, fig=fig, ax=ax, color=color) plotter.plot_square(agent.s_g[0], agent.s_g[1], param.r_agent, angle=45, fig=fig, ax=ax, color=color) # draw state for each time step robot = 0 if param.env_name in ['SingleIntegrator']: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 3 - 2 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] idx = 3 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.25, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 for i in range(num_obstacles): # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5]) # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) # # import matplotlib.pyplot as plt # # plt.savefig("test.svg") # # exit() elif param.env_name in ['DoubleIntegrator']: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 5 - 4 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] X = [] Y = [] U = [] V = [] idx = 5 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos X.append(pos[0]) Y.append(pos[1]) U.append(observation[idx + 2]) V.append(observation[idx + 3]) # print(np.linalg.norm(observation[idx+2:idx+4])) ax.add_patch( Circle(pos, param.r_agent, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 4 for i in range(num_obstacles): pos = observation[idx:idx + 2] + robot_pos - np.array( [0.5, 0.5]) ax.add_patch( Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) # pos = observation[idx : idx+2] + robot_pos # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) X.append(robot_pos[0]) Y.append(robot_pos[1]) U.append(observation[3]) V.append(observation[4]) # plot velocity vectors ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color='red', width=0.005) # plot time varying states if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.state_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.states_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.states[1:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name) # plot time varying actions if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.action_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.actions_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.actions[1:result.steps, agent.i * env.action_dim_per_agent + i_config], label=result.name) # if i_config == 5: ax.set_yscale('log') plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn) # visualize if visualize: # plotter.visualize(param, env, states_deeprl) env.visualize(sim_results[0].states[0:result.steps], 0.1)