print("Evaluation Number : {}".format(evaluation_number)) local_env = remote_client.env solver = r2_solver.Solver(evaluation_number) time_taken_by_controller = [] time_taken_per_step = [] steps = 0 while True: time_start = time.time() moves = solver.GetMoves(local_env.agents, observation) time_taken = time.time() - time_start time_taken_by_controller.append(time_taken) time_start = time.time() observation, all_rewards, done, info = remote_client.env_step(moves) steps += 1 time_taken = time.time() - time_start time_taken_per_step.append(time_taken) if done['__all__']: print("Reward : ", sum(list(all_rewards.values()))) break np_time_taken_by_controller = np.array(time_taken_by_controller) np_time_taken_per_step = np.array(time_taken_per_step) print("="*100) print("="*100) print("Evaluation Number : ", evaluation_number) print("Current Env Path : ", remote_client.current_env_path) print("Env Creation Time : ", env_creation_time)
nb_hit += 1 else: # otherwise, run normalization and inference norm_obs = normalize_observation(observation[agent], tree_depth=observation_tree_depth, observation_radius=observation_radius) action = policy.act(norm_obs, eps=0.0) action_dict[agent] = action if USE_ACTION_CACHE: agent_last_obs[agent] = observation[agent] agent_last_action[agent] = action agent_time = time.time() - time_start time_taken_by_controller.append(agent_time) time_start = time.time() _, all_rewards, done, info = remote_client.env_step(action_dict) step_time = time.time() - time_start time_taken_per_step.append(step_time) time_start = time.time() observation = tree_observation.get_many(list(range(nb_agents))) obs_time = time.time() - time_start else: # Fully deadlocked: perform no-ops no_ops_mode = True time_start = time.time() _, all_rewards, done, info = remote_client.env_step({}) step_time = time.time() - time_start time_taken_per_step.append(step_time)
else: action_dict.update({ cur_conflict_agent: next_shortest_actions[cur_conflict_agent] }) time_taken = time.time() - time_start time_taken_by_controller.append(time_taken) # Perform the chosen action on the environment. # The action gets applied to both the local and the remote copy # of the environment instance, and the observation is what is # returned by the local copy of the env, and the rewards, and done and info # are returned by the remote copy of the env time_start = time.time() observation, all_rewards, done, info = remote_client.env_step( action_dict) #print("Rewards: ", all_rewards, " [done=", done, "]") for a in range(env.get_num_agents()): score += all_rewards[a] / env.get_num_agents() steps += 1 time_taken = time.time() - time_start time_taken_per_step.append(time_taken) if done['__all__'] or steps >= max_steps: print("Reward : ", sum(list(all_rewards.values()))) # # When done['__all__'] == True, then the evaluation of this # particular Env instantiation is complete, and we can break out
# ##################################################################### # Compute the action for this step by using the previously # defined controller time_start = time.time() action, _ = controller.act(observation) time_taken = time.time() - time_start time_taken_by_controller.append(time_taken) # Perform the chosen action on the environment. # The action gets applied to both the local and the remote copy # of the environment instance, and the observation is what is # returned by the local copy of the env, and the rewards, and done and info # are returned by the remote copy of the env time_start = time.time() observation, all_rewards, done, _ = remote_client.env_step(action) steps += 1 time_taken = time.time() - time_start time_taken_per_step.append(time_taken) if RENDER: env_renderer.render_env(show=True, show_observations=True, show_predictions=True) if done['__all__']: print("Reward : ", sum(list(all_rewards.values()))) # # When done['__all__'] == True, then the evaluation of this # particular Env instantiation is complete, and we can break out # of this loop, and move onto the next Env evaluation
env_creation_time = time.time() - time_start print("Evaluation Number : {}".format(evaluation_number)) local_env = remote_client.env number_of_agents = len(local_env.agents) time_taken_by_controller = [] time_taken_per_step = [] steps = 0 # First random action for a in range(number_of_agents): action = 2 railenv_action_dict.update({a:action}) obs, all_rewards, done, info = remote_client.env_step(railenv_action_dict) while True: # Evaluation of a single episode time_start = time.time() # Pick actions for a in range(number_of_agents): if info['action_required'][a]: network_action = controller.act(obs[a]) railenv_action = observation_builder.choose_railenv_action(a, network_action) else: railenv_action = 0 railenv_action_dict.update({a: railenv_action}) time_taken = time.time() - time_start
time_taken_by_controller = [] time_taken_per_step = [] steps = 0 env_renderer = RenderTool(env) while True: time_start = time.time() _action = observation_builder.get_action_dict_safety(obs) time_taken = time.time() - time_start time_taken_by_controller.append(time_taken) time_start = time.time() next_obs, all_rewards, done, _ = remote_client.env_step(_action) time_taken = time.time() - time_start time_taken_per_step.append(time_taken) obs = next_obs steps += 1 if obs is None or done['__all__']: break np_time_taken_by_controller = np.array(time_taken_by_controller) np_time_taken_per_step = np.array(time_taken_per_step) print("="*100) print("="*100) print("Done Status : ", done) print("Evaluation Number : ", evaluation_number)
def evaluate_remote(): remote_client = FlatlandRemoteClient() my_observation_builder = SimpleObservation(max_depth=3, neighbours_depth=3, timetable=Judge(LinearOnAgentNumberSizeGenerator(0.03, 5), lr=0, batch_size=0, optimization_epochs=0, device=torch.device("cpu")), deadlock_checker=DeadlockChecker(), greedy_checker=GreedyChecker(), parallel=False, eval=True) params = torch.load("generated/params.torch") params.neighbours_depth=my_observation_builder.neighbours_depth controller = PPOController(params, torch.device("cpu")) controller.load_controller("generated/controller.torch") my_observation_builder.timetable.load_judge("generated/judge.torch") render = False sum_reward, sum_percent_done = 0., 0. for evaluation_number in itertools.count(): time_start = time.time() observation, info = remote_client.env_create(obs_builder_object=my_observation_builder) if not observation: break local_env = FlatlandWrapper(remote_client.env, FakeRewardShaper()) local_env.n_agents = len(local_env.agents) log().check_time() if render: env_renderer = RenderTool( local_env.env, agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND, show_debug=True, screen_height=600, screen_width=800 ) env_creation_time = time.time() - time_start print("Evaluation Number : {}".format(evaluation_number)) time_taken_by_controller = [] time_taken_per_step = [] steps = 0 done = defaultdict(lambda: False) while True: try: if render: env_renderer.render_env(show=True, show_observations=False, show_predictions=False) time_start = time.time() action_dict = dict() handles_to_ask = list() observation = {k: torch.tensor(v, dtype=torch.float) for k, v in observation.items() if v is not None} for i in range(local_env.n_agents): if not done[i]: if local_env.obs_builder.greedy_checker.greedy_position(i): action_dict[i] = 0 elif i in observation: handles_to_ask.append(i) for handle in handles_to_ask: for opp_handle in local_env.obs_builder.encountered[handle]: if opp_handle != -1 and opp_handle not in observation: observation[opp_handle] = torch.tensor(local_env.obs_builder._get_internal(opp_handle), dtype=torch.float) time_taken_per_step.append(time.time() - time_start) time_start = time.time() controller_actions = controller.fast_select_actions(handles_to_ask, observation, local_env.obs_builder.encountered, train=True) action_dict.update(controller_actions) action_dict = {k: local_env.transform_action(k, v) for k, v in action_dict.items()} action_dict = {handle: action for handle, action in action_dict.items() if action != -1} time_taken = time.time() - time_start time_taken_by_controller.append(time_taken) time_start = time.time() observation, all_rewards, done, info = remote_client.env_step(action_dict) num_done = sum([1 for agent in local_env.agents if agent.status == RailAgentStatus.DONE_REMOVED]) num_started = sum([1 for handle in range(len(local_env.agents)) if local_env.obs_builder.timetable.is_ready(handle)]) finished_handles = [handle for handle in range(len(local_env.agents)) if local_env.obs_builder.timetable.ready_to_depart[handle] == 2] reward = torch.sum(local_env._max_episode_steps - local_env.obs_builder.timetable.end_time[finished_handles]) reward /= len(local_env.agents) * local_env._max_episode_steps percent_done = float(num_done) / len(local_env.agents) deadlocked = int(sum(local_env.obs_builder.deadlock_checker._is_deadlocked) + 0.5) steps += 1 time_taken = time.time() - time_start time_taken_per_step.append(time_taken) if done['__all__']: print("Done agents {}/{}".format(num_done, len(local_env.agents))) print("Started agents {}/{}".format(num_started, len(local_env.agents))) print("Deadlocked agents {}/{}".format(deadlocked, len(local_env.agents))) print("Reward: {} Percent done: {}".format(reward, percent_done)) sum_reward += reward sum_percent_done += percent_done print("Total reward: {} Avg percent done: {}".format(sum_reward, sum_percent_done / (evaluation_number + 1))) if render: env_renderer.close_window() break except TimeoutException as err: print("Timeout! Will skip this episode and go to the next.", err) break np_time_taken_by_controller = np.array(time_taken_by_controller) np_time_taken_per_step = np.array(time_taken_per_step) print("="*100) print("="*100) print("Evaluation Number : ", evaluation_number) print("Current Env Path : ", remote_client.current_env_path) print("Env Creation Time : ", env_creation_time) print("Number of Steps : {}/{}".format(steps, local_env._max_episode_steps)) print("Mean/Std/Sum of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std(), np_time_taken_by_controller.sum()) print("Mean/Std/Sum of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std(), np_time_taken_per_step.sum()) log().print_time_metrics() log().zero_time_metrics() print("="*100) print("\n\n") print("Evaluation of all environments complete...") print(remote_client.submit())
state_machine_action = sm.act(triggers) # State machine picks action for a in range(number_of_agents): #state_machine_action = act(prediction_depth, state[a]) # State machine picks action railenv_action = observation_builder.choose_railenv_action(a, state_machine_action) # state_machine_action_dict.update({a: state_machine_action}) railenv_action_dict.update({a: railenv_action}) time_taken = time.time() - time_start time_taken_by_controller.append(time_taken) # Perform the chosen action on the environment. # The action gets applied to both the local and the remote copy # of the environment instance, and the observation is what is # returned by the local copy of the env, and the rewards, and done and info # are returned by the remote copy of the env time_start = time.time() state, reward, done, info = remote_client.env_step(railenv_action_dict) steps += 1 time_taken = time.time() - time_start time_taken_per_step.append(time_taken) reward_sum += sum(list(reward.values())) if steps % 1 == 0: print("Step / Max Steps: {}/{}".format(steps, max_time_steps), 'time_taken_by_controller', round(time_taken_by_controller[-1],3), 'time_taken_per_step', round(time_taken_per_step[-1],1), 'reward_step', round(sum(list(reward.values())),1), 'reward_sum', round(reward_sum)) if steps > max_time_steps: # To avoid that all dones are set to 0 after reaching max_time_steps break if done['__all__']: # print("Reward : ", sum(list(reward.values()))) # # When done['__all__'] == True, then the evaluation of this # particular Env instantiation is complete, and we can break out