示例#1
0
    print("Evaluation Number : {}".format(evaluation_number))

    local_env = remote_client.env
    solver = r2_solver.Solver(evaluation_number)

    time_taken_by_controller = []
    time_taken_per_step = []
    steps = 0
    while True:
        time_start = time.time()
        moves = solver.GetMoves(local_env.agents, observation)
        time_taken = time.time() - time_start
        time_taken_by_controller.append(time_taken)

        time_start = time.time()
        observation, all_rewards, done, info = remote_client.env_step(moves)
        steps += 1
        time_taken = time.time() - time_start
        time_taken_per_step.append(time_taken)

        if done['__all__']:
            print("Reward : ", sum(list(all_rewards.values())))
            break
    
    np_time_taken_by_controller = np.array(time_taken_by_controller)
    np_time_taken_per_step = np.array(time_taken_per_step)
    print("="*100)
    print("="*100)
    print("Evaluation Number : ", evaluation_number)
    print("Current Env Path : ", remote_client.current_env_path)
    print("Env Creation Time : ", env_creation_time)
                            nb_hit += 1
                        else:
                            # otherwise, run normalization and inference
                            norm_obs = normalize_observation(observation[agent], tree_depth=observation_tree_depth, observation_radius=observation_radius)
                            action = policy.act(norm_obs, eps=0.0)

                        action_dict[agent] = action

                        if USE_ACTION_CACHE:
                            agent_last_obs[agent] = observation[agent]
                            agent_last_action[agent] = action
                agent_time = time.time() - time_start
                time_taken_by_controller.append(agent_time)

                time_start = time.time()
                _, all_rewards, done, info = remote_client.env_step(action_dict)
                step_time = time.time() - time_start
                time_taken_per_step.append(step_time)

                time_start = time.time()
                observation = tree_observation.get_many(list(range(nb_agents)))
                obs_time = time.time() - time_start

            else:
                # Fully deadlocked: perform no-ops
                no_ops_mode = True

                time_start = time.time()
                _, all_rewards, done, info = remote_client.env_step({})
                step_time = time.time() - time_start
                time_taken_per_step.append(step_time)
示例#3
0
                            else:
                                action_dict.update({
                                    cur_conflict_agent:
                                    next_shortest_actions[cur_conflict_agent]
                                })

        time_taken = time.time() - time_start
        time_taken_by_controller.append(time_taken)

        # Perform the chosen action on the environment.
        # The action gets applied to both the local and the remote copy
        # of the environment instance, and the observation is what is
        # returned by the local copy of the env, and the rewards, and done and info
        # are returned by the remote copy of the env
        time_start = time.time()
        observation, all_rewards, done, info = remote_client.env_step(
            action_dict)

        #print("Rewards: ", all_rewards, "  [done=", done, "]")

        for a in range(env.get_num_agents()):
            score += all_rewards[a] / env.get_num_agents()

        steps += 1
        time_taken = time.time() - time_start
        time_taken_per_step.append(time_taken)

        if done['__all__'] or steps >= max_steps:
            print("Reward : ", sum(list(all_rewards.values())))
            #
            # When done['__all__'] == True, then the evaluation of this
            # particular Env instantiation is complete, and we can break out
示例#4
0
        #
        #####################################################################
        # Compute the action for this step by using the previously
        # defined controller
        time_start = time.time()
        action, _ = controller.act(observation)
        time_taken = time.time() - time_start
        time_taken_by_controller.append(time_taken)

        # Perform the chosen action on the environment.
        # The action gets applied to both the local and the remote copy
        # of the environment instance, and the observation is what is
        # returned by the local copy of the env, and the rewards, and done and info
        # are returned by the remote copy of the env
        time_start = time.time()
        observation, all_rewards, done, _ = remote_client.env_step(action)
        steps += 1
        time_taken = time.time() - time_start
        time_taken_per_step.append(time_taken)

        if RENDER:
            env_renderer.render_env(show=True,
                                    show_observations=True,
                                    show_predictions=True)

        if done['__all__']:
            print("Reward : ", sum(list(all_rewards.values())))
            #
            # When done['__all__'] == True, then the evaluation of this
            # particular Env instantiation is complete, and we can break out
            # of this loop, and move onto the next Env evaluation
示例#5
0
    
    env_creation_time = time.time() - time_start
    
    print("Evaluation Number : {}".format(evaluation_number))

    local_env = remote_client.env
    number_of_agents = len(local_env.agents)
    
    time_taken_by_controller = []
    time_taken_per_step = []
    steps = 0
    # First random action
    for a in range(number_of_agents):
        action = 2
        railenv_action_dict.update({a:action})
    obs, all_rewards, done, info = remote_client.env_step(railenv_action_dict)

    while True:
        # Evaluation of a single episode
    
        time_start = time.time()
        # Pick actions
        for a in range(number_of_agents):
            if info['action_required'][a]:
                network_action = controller.act(obs[a])
                railenv_action = observation_builder.choose_railenv_action(a, network_action)
            else:
                railenv_action = 0
            railenv_action_dict.update({a: railenv_action})
                
        time_taken = time.time() - time_start
        time_taken_by_controller = []
        time_taken_per_step = []
        steps = 0

        env_renderer = RenderTool(env)

        while True:

            time_start = time.time()
            _action = observation_builder.get_action_dict_safety(obs)
            time_taken = time.time() - time_start
            time_taken_by_controller.append(time_taken)

            time_start = time.time()
            next_obs, all_rewards, done, _ = remote_client.env_step(_action)

            time_taken = time.time() - time_start
            time_taken_per_step.append(time_taken)

            obs = next_obs
            steps += 1
            if obs is None or done['__all__']:
                break

        np_time_taken_by_controller = np.array(time_taken_by_controller)
        np_time_taken_per_step = np.array(time_taken_per_step)
        print("="*100)
        print("="*100)
        print("Done Status : ", done)
        print("Evaluation Number : ", evaluation_number)
示例#7
0
def evaluate_remote():
    remote_client = FlatlandRemoteClient()
    my_observation_builder = SimpleObservation(max_depth=3, neighbours_depth=3,
            timetable=Judge(LinearOnAgentNumberSizeGenerator(0.03, 5), lr=0,
                    batch_size=0, optimization_epochs=0, device=torch.device("cpu")),
            deadlock_checker=DeadlockChecker(), greedy_checker=GreedyChecker(), parallel=False, eval=True)

    params = torch.load("generated/params.torch")
    params.neighbours_depth=my_observation_builder.neighbours_depth
    controller = PPOController(params, torch.device("cpu"))
    controller.load_controller("generated/controller.torch")
    my_observation_builder.timetable.load_judge("generated/judge.torch")

    render = False

    sum_reward, sum_percent_done = 0., 0.
    for evaluation_number in itertools.count():
        time_start = time.time()
        observation, info = remote_client.env_create(obs_builder_object=my_observation_builder)
        if not observation:
            break

        local_env = FlatlandWrapper(remote_client.env, FakeRewardShaper())
        local_env.n_agents = len(local_env.agents)
        log().check_time()
        if render:
            env_renderer = RenderTool(
                local_env.env,
                agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND,
                show_debug=True,
                screen_height=600,
                screen_width=800
            )

        env_creation_time = time.time() - time_start

        print("Evaluation Number : {}".format(evaluation_number))

        time_taken_by_controller = []
        time_taken_per_step = []
        steps = 0
        done = defaultdict(lambda: False)
        while True:
            try:
                if render:
                    env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
                time_start = time.time()
                action_dict = dict()
                handles_to_ask = list()
                observation = {k: torch.tensor(v, dtype=torch.float) for k, v in observation.items() if v is not None}
                for i in range(local_env.n_agents):
                    if not done[i]:
                        if local_env.obs_builder.greedy_checker.greedy_position(i):
                            action_dict[i] = 0
                        elif i in observation:
                            handles_to_ask.append(i)

                for handle in handles_to_ask:
                    for opp_handle in local_env.obs_builder.encountered[handle]:
                        if opp_handle != -1 and opp_handle not in observation:
                            observation[opp_handle] = torch.tensor(local_env.obs_builder._get_internal(opp_handle), dtype=torch.float)

                time_taken_per_step.append(time.time() - time_start)
                time_start = time.time()

                controller_actions = controller.fast_select_actions(handles_to_ask, observation,
                        local_env.obs_builder.encountered, train=True)
                action_dict.update(controller_actions)
                action_dict = {k: local_env.transform_action(k, v) for k, v in action_dict.items()}
                action_dict = {handle: action for handle, action in action_dict.items() if action != -1}

                time_taken = time.time() - time_start
                time_taken_by_controller.append(time_taken)

                time_start = time.time()
                observation, all_rewards, done, info = remote_client.env_step(action_dict)
                num_done = sum([1 for agent in local_env.agents if agent.status == RailAgentStatus.DONE_REMOVED])
                num_started = sum([1 for handle in range(len(local_env.agents)) if local_env.obs_builder.timetable.is_ready(handle)])

                finished_handles = [handle for handle in range(len(local_env.agents))
                        if local_env.obs_builder.timetable.ready_to_depart[handle] == 2]
                reward = torch.sum(local_env._max_episode_steps - local_env.obs_builder.timetable.end_time[finished_handles])
                reward /= len(local_env.agents) * local_env._max_episode_steps
                percent_done = float(num_done) / len(local_env.agents)
                deadlocked = int(sum(local_env.obs_builder.deadlock_checker._is_deadlocked) + 0.5)

                steps += 1
                time_taken = time.time() - time_start
                time_taken_per_step.append(time_taken)

                if done['__all__']:
                    print("Done agents {}/{}".format(num_done, len(local_env.agents)))
                    print("Started agents {}/{}".format(num_started, len(local_env.agents)))
                    print("Deadlocked agents {}/{}".format(deadlocked, len(local_env.agents)))
                    print("Reward: {}        Percent done: {}".format(reward, percent_done))
                    sum_reward += reward
                    sum_percent_done += percent_done
                    print("Total reward: {}        Avg percent done: {}".format(sum_reward, sum_percent_done / (evaluation_number + 1)))
                    if render:
                        env_renderer.close_window()
                    break
            except TimeoutException as err:
                print("Timeout! Will skip this episode and go to the next.", err)
                break

        
        np_time_taken_by_controller = np.array(time_taken_by_controller)
        np_time_taken_per_step = np.array(time_taken_per_step)
        print("="*100)
        print("="*100)
        print("Evaluation Number : ", evaluation_number)
        print("Current Env Path : ", remote_client.current_env_path)
        print("Env Creation Time : ", env_creation_time)
        print("Number of Steps : {}/{}".format(steps, local_env._max_episode_steps))
        print("Mean/Std/Sum of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std(), np_time_taken_by_controller.sum())
        print("Mean/Std/Sum of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std(), np_time_taken_per_step.sum())
        log().print_time_metrics()
        log().zero_time_metrics()
        print("="*100)
        print("\n\n")

    print("Evaluation of all environments complete...")
    print(remote_client.submit())
示例#8
0
        state_machine_action = sm.act(triggers) # State machine picks action

        for a in range(number_of_agents):
            #state_machine_action = act(prediction_depth, state[a])  # State machine picks action
            railenv_action = observation_builder.choose_railenv_action(a, state_machine_action)
            # state_machine_action_dict.update({a: state_machine_action})
            railenv_action_dict.update({a: railenv_action})
        time_taken = time.time() - time_start
        time_taken_by_controller.append(time_taken)
        # Perform the chosen action on the environment.
        # The action gets applied to both the local and the remote copy 
        # of the environment instance, and the observation is what is 
        # returned by the local copy of the env, and the rewards, and done and info
        # are returned by the remote copy of the env
        time_start = time.time()
        state, reward, done, info = remote_client.env_step(railenv_action_dict)
        steps += 1
        time_taken = time.time() - time_start
        time_taken_per_step.append(time_taken)
        reward_sum += sum(list(reward.values()))

        if steps % 1 == 0:
           print("Step / Max Steps: {}/{}".format(steps, max_time_steps), 'time_taken_by_controller', round(time_taken_by_controller[-1],3), 'time_taken_per_step', round(time_taken_per_step[-1],1), 'reward_step', round(sum(list(reward.values())),1), 'reward_sum', round(reward_sum))

        if steps > max_time_steps: # To avoid that all dones are set to 0 after reaching max_time_steps
            break
        if done['__all__']:
            # print("Reward : ", sum(list(reward.values())))
            #
            # When done['__all__'] == True, then the evaluation of this 
            # particular Env instantiation is complete, and we can break out