Exemplo n.º 1
0
def main():

    param = Param()
    env = CityMap(param)

    if param.make_dataset_on:
        print('   making dataset...')
        train_dataset, test_dataset = datahandler.make_dataset(env)
        datahandler.write_dataset(env, train_dataset, test_dataset)

    datahandler.load_dataset(env)

    train_dataset = env.train_dataset
    test_dataset = env.test_dataset

    train_w = env.get_customer_demand(train_dataset, train_dataset[-1, 0])
    test_w = env.get_customer_demand(test_dataset, test_dataset[-1, 0])

    print(train_dataset[0, :])
    print(
        np.linalg.norm([
            train_dataset[0, 4] - train_dataset[0, 2],
            train_dataset[0, 5] - train_dataset[0, 3]
        ]))
    print(test_dataset[0, :])
    print(
        np.linalg.norm([
            test_dataset[0, 4] - test_dataset[0, 2],
            test_dataset[0, 5] - test_dataset[0, 3]
        ]))

    # dataset = [\
    # starttime [s],
    # duration [s],
    # x_p (longitude) [degrees],
    # y_p (latitute) [degrees],
    # x_d (longitude) [degrees],
    # x_d (latitute) [degrees],
    #]

    print('train_dataset.shape', train_dataset.shape)
    print('test_dataset.shape', test_dataset.shape)

    print('train ave_length: ', ave_length(train_dataset))
    print('test ave_length: ', ave_length(test_dataset))

    print('train ave_duration: ', ave_duration(train_dataset))
    print('test ave_duration: ', ave_duration(test_dataset))

    print('train ave_customer_per_time: ',
          ave_customer_per_time(train_dataset))
    print('test ave_customer_per_time: ', ave_customer_per_time(test_dataset))

    print('train ave taxi speed: ', ave_taxi_speed(train_dataset))
    print('test ave taxi speed: ', ave_taxi_speed(test_dataset))

    print('train_dataset[0:1,:]:', train_dataset[0:1, :])
    print('test_dataset[0:1,:]:', test_dataset[0:1, :])

    fig, ax = plt.subplots()
    ax.plot(train_dataset[:, 0])
    ax.set_ylabel('pickup time')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(test_dataset[:, 0])
    ax.set_ylabel('pickup time')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(train_dataset[:, 1])
    ax.set_ylabel('train duration')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(test_dataset[:, 1])
    ax.set_ylabel('test duration')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(
        np.linalg.norm([
            train_dataset[:, 4] - train_dataset[:, 2],
            train_dataset[:, 5] - train_dataset[:, 3]
        ],
                       axis=0))
    ax.set_ylabel('train length')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(
        np.linalg.norm([
            test_dataset[:, 4] - test_dataset[:, 2],
            test_dataset[:, 5] - test_dataset[:, 3]
        ],
                       axis=0))
    ax.set_ylabel('test length')
    ax.set_xlabel('customer request #')

    print('saving and opening figs...')
    plotter.save_figs('dataset_plots.pdf')
    plotter.open_figs('dataset_plots.pdf')
		# # plotting
		if True:
			env.render()

			for i_config in range(1): #range(env.state_dim_per_agent):
				fig,ax = plotter.make_fig()
				# ax.set_title(env.states_name[i_config])			
				for agent in env.agents:
					if env.good_nodes[agent.i]:
						color = 'blue'
					else:
						color = 'red'
					ax.plot(
						times[0:steps],
						states[0:steps,env.agent_idx_to_state_idx(agent.i)+i_config],
						color=color)
				ax.axhline(
					env.desired_ave,
					label='desired',
					color='green')
		
		print('Number of Trajectories:', trajectory_rollout_count)
		print('Data Shape: ', data.shape)

	file = os.path.join(param.il_load_dataset,'rl_data.npy')
	np.save(file, data)

	plotter.save_figs(param.plots_fn)
	plotter.open_figs(param.plots_fn)
Exemplo n.º 3
0
def sim(param, env, controllers, initial_state, visualize):
    def run_sim(controller, initial_state):
        states = np.zeros((len(times), env.n))
        actions = np.zeros((len(times) - 1, env.m))
        states[0] = env.reset(initial_state)
        reward = 0
        for step, time in enumerate(times[:-1]):
            state = states[step]
            if param.pomdp_on:
                observation = env.observe()
            else:
                observation = state

            action = controller.policy(observation)
            states[step + 1], r, done, _ = env.step(action)
            reward += r
            actions[step] = action.flatten()
            if done:
                break

        print('reward: ', reward)
        env.close()
        return states, actions, step

    # -------------------------------------------

    # environment
    times = param.sim_times
    device = "cpu"

    if initial_state is None:
        initial_state = env.reset()

    print("Initial State: ", initial_state)

    # run sim
    SimResult = namedtuple('SimResult', ['states', 'actions', 'steps', 'name'])

    for name, controller in controllers.items():
        print("Running simulation with " + name)
        if hasattr(controller, 'policy'):
            result = SimResult._make(
                run_sim(controller, initial_state) + (name, ))
        else:
            result = SimResult._make((controller.states, controller.actions,
                                      controller.steps, name))
        sim_results = []
        sim_results.append(result)

        if param.sim_render_on:
            env.render()

        # plot time varying states
        if param.single_agent_sim:
            for i in range(env.n):
                fig, ax = plotter.subplots()
                ax.set_title(env.states_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.states[0:result.steps, i],
                            label=result.name)
                ax.legend()

            for i, name in enumerate(env.deduced_state_names):
                fig, ax = plotter.subplots()
                ax.set_title(name)
                for result in sim_results:
                    deduce_states = np.empty(
                        (result.steps, len(env.deduced_state_names)))
                    for j in range(result.steps):
                        deduce_states[j] = env.deduce_state(result.states[j])

                    ax.plot(times[0:result.steps],
                            deduce_states[:, i],
                            label=result.name)
                ax.legend()

            for i in range(env.m):
                fig, ax = plotter.subplots()
                ax.set_title(env.actions_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.actions[0:result.steps, i],
                            label=result.name)
                ax.legend()

        elif param.multi_agent_sim:
            for i_config in range(1):  #range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    if env.good_nodes[agent.i]:
                        color = 'blue'
                    else:
                        color = 'red'
                    for result in sim_results:
                        ax.plot(
                            times[0:result.steps],
                            result.states[0:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name,
                            color=color)
                ax.axhline(env.desired_ave, label='desired', color='green')

        # # plot state space
        # if param.multi_agent_sim:
        # 	fig,ax = plotter.make_fig()
        # 	for agent in env.agents:
        # 		ax.set_title('State Space')
        # 		for result in sim_results:
        # 			ax.plot(
        # 				result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)],
        # 				result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)+1],
        # 				label=result.name)

        # extract gains
        if param.il_controller_class in ['PID_wRef', 'PID']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            kp, kd = util.extract_gains(controller,
                                        result.states[0:result.steps])
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 0],
                                   title='Kp pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 1],
                                   title='Kp theta')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 0],
                                   title='Kd pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 1],
                                   title='Kd theta')

        # extract reference trajectory
        if param.il_controller_class in ['PID_wRef', 'Ref']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            ref_state = util.extract_ref_state(controller, result.states)
            for i in range(env.n):
                fig, ax = plotter.plot(times[1:result.steps + 1],
                                       ref_state[0:result.steps, i],
                                       title="ref " + env.states_name[i])

    plotter.save_figs(param.plots_fn)
    plotter.open_figs(param.plots_fn)

    # visualize
    if visualize:
        # plotter.visualize(param, env, states_deeprl)
        env.visualize(sim_results[0].states[0:result.steps], 0.1)
Exemplo n.º 4
0
def main():

	param = Param()

	param.desired_env_ncell = 2000
	param.xmin_thresh = None
	param.xmax_thresh = None
	param.ymin_thresh = None
	param.ymax_thresh = None

	param.update()

	env = CityMap(param)

	# init datasets
	if param.make_dataset_on:
		print('   making dataset...')
		train_dataset, test_dataset = datahandler.make_dataset(env)
		datahandler.write_dataset(env, train_dataset, test_dataset)
	print('   loading dataset...')
	datahandler.load_dataset(env)

	# env.init_map()

	# fig,ax=plotter.make_fig()
	# env.print_map(fig=fig,ax=ax)

	train_heatmap = env.heatmap(env.train_dataset)
	test_heatmap = env.heatmap(env.test_dataset)

	fig = plt.figure()
	gs = fig.add_gridspec(2,3)
	ax1 = fig.add_subplot(gs[0, 0])
	ax2 = fig.add_subplot(gs[0, 1])
	ax3 = fig.add_subplot(gs[0, 2])
	ax4 = fig.add_subplot(gs[1, :])

	plotter.plot_heatmap(env,train_heatmap,ax1)
	plotter.plot_heatmap(env,train_heatmap,ax2)
	plotter.plot_heatmap(env,test_heatmap,ax3)
	plotter.plot_cell_demand_over_time(env, ax4)
	
	ax2.set_ylim(bottom=41.85)
	ax2.set_ylim(top=42.)
	ax2.set_xlim(left=-87.7) 
	ax2.set_xlim(right=-87.6)

	ax3.set_ylim(bottom=41.85)
	ax3.set_ylim(top=42.)
	ax3.set_xlim(left=-87.7)
	ax3.set_xlim(right=-87.6)

	ax1.axis("off") 
	ax2.axis("off")
	ax3.axis("off")

	ax1.set_title('Chicago',fontsize=10)
	ax2.set_title('October 29, 2016',fontsize=7.5)
	ax3.set_title('October 30, 2016',fontsize=7.5)

	# ax1.get_yaxis().set_visible(False)
	# ax2.get_xaxis().set_visible(False)
	# ax2.get_yaxis().set_visible(False)


	print('saving and opening figs...')
	plotter.save_figs(param.plot_fn)
	plotter.open_figs(param.plot_fn)
Exemplo n.º 5
0
def sim(param, env, controllers, initial_state, visualize):

    # environment
    times = param.sim_times
    device = "cpu"

    if initial_state is None:
        initial_state = env.reset()

    # run sim
    SimResult = namedtuple(
        'SimResult', ['states', 'observations', 'actions', 'steps', 'name'])

    for name, controller in controllers.items():
        print("Running simulation with " + name)
        print("Initial State: ", initial_state)
        if hasattr(controller, 'policy'):
            result = SimResult._make(
                run_sim(param, env, controller, initial_state) + (name, ))
        else:
            observations = []
            result = SimResult._make(
                (controller.states, observations, controller.actions,
                 controller.steps, name))
        sim_results = []
        sim_results.append(result)

        # plot state space
        if param.env_name in [
                'SingleIntegrator', 'SingleIntegratorVelSensing',
                'DoubleIntegrator'
        ]:
            fig, ax = plotter.make_fig()
            ax.set_title('State Space')
            ax.set_aspect('equal')

            for o in env.obstacles:
                ax.add_patch(
                    Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5))

            for agent in env.agents:

                line = ax.plot(
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i)],
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    alpha=0.5)
                color = line[0].get_color()

                # plot velocity vectors:
                X = []
                Y = []
                U = []
                V = []
                for k in np.arange(0, result.steps, 100):
                    X.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i)])
                    Y.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i) + 1])
                    if param.env_name in [
                            'SingleIntegrator', 'SingleIntegratorVelSensing'
                    ]:
                        # Singleintegrator: plot actions
                        U.append(result.actions[k, 2 * agent.i + 0])
                        V.append(result.actions[k, 2 * agent.i + 1])
                    elif param.env_name in ['DoubleIntegrator']:
                        # doubleintegrator: plot velocities
                        U.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          2])
                        V.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          3])

                ax.quiver(X,
                          Y,
                          U,
                          V,
                          angles='xy',
                          scale_units='xy',
                          scale=0.5,
                          color=color,
                          width=0.005)

                # num_obstacles = (result.observations.shape[1] - result.observations[0][0] - 5) / 2
                # print(num_obstacles)
                #
                # print(result.observations)

                plotter.plot_circle(
                    result.states[1, env.agent_idx_to_state_idx(agent.i)],
                    result.states[1,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    param.r_agent,
                    fig=fig,
                    ax=ax,
                    color=color)
                # plotter.plot_square(result.states[-1,env.agent_idx_to_state_idx(agent.i)],
                # 	result.states[-1,env.agent_idx_to_state_idx(agent.i)+1],param.r_agent,fig=fig,ax=ax,color=color)
                plotter.plot_square(agent.s_g[0],
                                    agent.s_g[1],
                                    param.r_agent,
                                    angle=45,
                                    fig=fig,
                                    ax=ax,
                                    color=color)

            # draw state for each time step
            robot = 1
            if param.env_name in [
                    'SingleIntegrator', 'SingleIntegratorVelSensing'
            ]:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 3 - 2 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    idx = 3
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.25,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    for i in range(num_obstacles):
                        # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5])
                        # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5))
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.5,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))

                # 	# import matplotlib.pyplot as plt
                # 	# plt.savefig("test.svg")
                # 	# exit()
            elif param.env_name in ['DoubleIntegrator']:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 5 - 4 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    X = []
                    Y = []
                    U = []
                    V = []

                    idx = 5
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        X.append(pos[0])
                        Y.append(pos[1])
                        U.append(observation[idx + 2])
                        V.append(observation[idx + 3])
                        # print(np.linalg.norm(observation[idx+2:idx+4]))
                        ax.add_patch(
                            Circle(pos,
                                   param.r_agent,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 4

                    for i in range(num_obstacles):
                        pos = observation[idx:idx + 2] + robot_pos - np.array(
                            [0.5, 0.5])
                        ax.add_patch(
                            Rectangle(pos,
                                      1.0,
                                      1.0,
                                      facecolor='gray',
                                      edgecolor='red',
                                      alpha=0.5))
                        # pos = observation[idx : idx+2] + robot_pos
                        # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))
                    X.append(robot_pos[0])
                    Y.append(robot_pos[1])
                    U.append(observation[3])
                    V.append(observation[4])

                    # plot velocity vectors
                    ax.quiver(X,
                              Y,
                              U,
                              V,
                              angles='xy',
                              scale_units='xy',
                              scale=0.5,
                              color='red',
                              width=0.005)

        elif param.env_name == 'Consensus' and param.sim_render_on:
            env.render()
        elif param.sim_render_on:
            env.render()

        # plot time varying states
        if param.single_agent_sim:
            for i in range(env.n):
                fig, ax = plotter.subplots()
                ax.set_title(env.states_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.states[0:result.steps, i],
                            label=result.name)
                ax.legend()

            for i, name in enumerate(env.deduced_state_names):
                fig, ax = plotter.subplots()
                ax.set_title(name)
                for result in sim_results:
                    deduce_states = np.empty(
                        (result.steps, len(env.deduced_state_names)))
                    for j in range(result.steps):
                        deduce_states[j] = env.deduce_state(result.states[j])

                    ax.plot(times[0:result.steps],
                            deduce_states[:, i],
                            label=result.name)
                ax.legend()

            for i in range(env.m):
                fig, ax = plotter.subplots()
                ax.set_title(env.actions_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.actions[0:result.steps, i],
                            label=result.name)
                ax.legend()

        elif param.env_name == 'Consensus':
            for i_config in range(1):  #range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                # ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    if env.good_nodes[agent.i]:
                        color = 'blue'
                    else:
                        color = 'red'
                    for result in sim_results:
                        ax.plot(
                            times[0:result.steps],
                            result.states[0:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name,
                            color=color)
                ax.axhline(env.desired_ave, label='desired', color='green')

        elif param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.states[1:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name)

        # plot time varying actions
        if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.action_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.actions_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.actions[1:result.steps,
                                           agent.i * env.action_dim_per_agent +
                                           i_config],
                            label=result.name)

        # extract gains
        if name in ['PID_wRef', 'PID']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            kp, kd = util.extract_gains(controller,
                                        result.states[0:result.steps])
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 0],
                                   title='Kp pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 1],
                                   title='Kp theta')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 0],
                                   title='Kd pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 1],
                                   title='Kd theta')

        # extract reference trajectory
        if name in ['PID_wRef', 'Ref']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            ref_state = util.extract_ref_state(controller, result.states)
            for i in range(env.n):
                fig, ax = plotter.plot(times[1:result.steps + 1],
                                       ref_state[0:result.steps, i],
                                       title="ref " + env.states_name[i])

        # extract belief topology
        if name in ['IL'] and param.env_name is 'Consensus':
            for result in sim_results:
                if result.name == 'IL':
                    break

            fig, ax = plotter.make_fig()
            belief_topology = util.extract_belief_topology(
                controller, result.observations)
            label_on = True
            if param.n_agents * param.n_agents > 10:
                label_on = False
            for i_agent in range(param.n_agents):
                for j_agent in range(param.n_agents):
                    if not i_agent == j_agent and env.good_nodes[i_agent]:
                        if label_on:
                            plotter.plot(times[0:result.steps + 1],
                                         belief_topology[:, i_agent, j_agent],
                                         fig=fig,
                                         ax=ax,
                                         label="K:{}{}".format(
                                             i_agent, j_agent))
                        else:
                            plotter.plot(times[0:result.steps + 1],
                                         belief_topology[:, i_agent, j_agent],
                                         fig=fig,
                                         ax=ax)

    plotter.save_figs(param.plots_fn)
    plotter.open_figs(param.plots_fn)

    # visualize
    if visualize:
        # plotter.visualize(param, env, states_deeprl)
        env.visualize(sim_results[0].states[0:result.steps], 0.1)
Exemplo n.º 6
0
	else:
		# micro sim 
		# 	- plot sim 
		if default_param.plot_sim_over_time:
			for sim_result in sim_results:
				controller_name = sim_result["controller_name"]
				times = sim_result["times"]
				plotter.sim_plot_over_time(controller_name,sim_result,times)
		else:
			for sim_result in sim_results:
				for timestep,time in enumerate(sim_result["times"]):
					controller_name = sim_result["controller_name"]
					plotter.render(controller_name,sim_result,timestep)
					# break
				
		# 	- plot reward
		plotter.plot_cumulative_reward(sim_results)

		# 	- plot q value estimation 
		for controller_name in default_param.controller_names:
			if 'bellman' in controller_name:
				plotter.plot_q_error(sim_results)
				break 

		

	print('saving and opening figs...')
	plotter.save_figs(default_param.plot_fn)
	plotter.open_figs(default_param.plot_fn)
Exemplo n.º 7
0
def sim(param, env, controllers, initial_state, visualize):

    # environment
    times = param.sim_times
    device = "cpu"

    if initial_state is None:
        initial_state = env.reset()

    # run sim
    SimResult = namedtuple(
        'SimResult', ['states', 'observations', 'actions', 'steps', 'name'])

    for name, controller in controllers.items():
        print("Running simulation with " + name)
        print("Initial State: ", initial_state)
        if hasattr(controller, 'policy'):
            result = SimResult._make(
                run_sim(param, env, controller, initial_state) + (name, ))
        else:
            observations = []
            result = SimResult._make(
                (controller.states, observations, controller.actions,
                 controller.steps, name))
        sim_results = []
        sim_results.append(result)

        # plot state space
        if param.env_name in [
                'SingleIntegrator', 'SingleIntegratorVelSensing',
                'DoubleIntegrator'
        ]:
            fig, ax = plotter.make_fig()
            ax.set_title('State Space')
            ax.set_aspect('equal')

            for o in env.obstacles:
                ax.add_patch(
                    Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5))

            for agent in env.agents:

                line = ax.plot(
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i)],
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    alpha=0.5)
                color = line[0].get_color()

                # plot velocity vectors:
                X = []
                Y = []
                U = []
                V = []
                for k in np.arange(0, result.steps, 100):
                    X.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i)])
                    Y.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i) + 1])
                    if param.env_name in [
                            'SingleIntegrator', 'SingleIntegratorVelSensing'
                    ]:
                        # Singleintegrator: plot actions
                        U.append(result.actions[k, 2 * agent.i + 0])
                        V.append(result.actions[k, 2 * agent.i + 1])
                    elif param.env_name in ['DoubleIntegrator']:
                        # doubleintegrator: plot velocities
                        U.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          2])
                        V.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          3])

                ax.quiver(X,
                          Y,
                          U,
                          V,
                          angles='xy',
                          scale_units='xy',
                          scale=0.5,
                          color=color,
                          width=0.005)
                plotter.plot_circle(
                    result.states[1, env.agent_idx_to_state_idx(agent.i)],
                    result.states[1,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    param.r_agent,
                    fig=fig,
                    ax=ax,
                    color=color)
                plotter.plot_square(agent.s_g[0],
                                    agent.s_g[1],
                                    param.r_agent,
                                    angle=45,
                                    fig=fig,
                                    ax=ax,
                                    color=color)

            # draw state for each time step
            robot = 0
            if param.env_name in ['SingleIntegrator']:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 3 - 2 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    idx = 3
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.25,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    for i in range(num_obstacles):
                        # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5])
                        # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5))
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.5,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))

                # 	# import matplotlib.pyplot as plt
                # 	# plt.savefig("test.svg")
                # 	# exit()
            elif param.env_name in ['DoubleIntegrator']:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 5 - 4 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    X = []
                    Y = []
                    U = []
                    V = []

                    idx = 5
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        X.append(pos[0])
                        Y.append(pos[1])
                        U.append(observation[idx + 2])
                        V.append(observation[idx + 3])
                        # print(np.linalg.norm(observation[idx+2:idx+4]))
                        ax.add_patch(
                            Circle(pos,
                                   param.r_agent,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 4

                    for i in range(num_obstacles):
                        pos = observation[idx:idx + 2] + robot_pos - np.array(
                            [0.5, 0.5])
                        ax.add_patch(
                            Rectangle(pos,
                                      1.0,
                                      1.0,
                                      facecolor='gray',
                                      edgecolor='red',
                                      alpha=0.5))
                        # pos = observation[idx : idx+2] + robot_pos
                        # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))
                    X.append(robot_pos[0])
                    Y.append(robot_pos[1])
                    U.append(observation[3])
                    V.append(observation[4])

                    # plot velocity vectors
                    ax.quiver(X,
                              Y,
                              U,
                              V,
                              angles='xy',
                              scale_units='xy',
                              scale=0.5,
                              color='red',
                              width=0.005)

        # plot time varying states
        if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.states[1:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name)

        # plot time varying actions
        if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.action_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.actions_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.actions[1:result.steps,
                                           agent.i * env.action_dim_per_agent +
                                           i_config],
                            label=result.name)

                        #
                        if i_config == 5:
                            ax.set_yscale('log')

    plotter.save_figs(param.plots_fn)
    plotter.open_figs(param.plots_fn)

    # visualize
    if visualize:
        # plotter.visualize(param, env, states_deeprl)
        env.visualize(sim_results[0].states[0:result.steps], 0.1)