示例#1
0
        def run_experiment():
            total_step = 0
            agent, env, spec = prepare_test_env_agent(headless=True)
            timestep_sec = env.timestep_sec
            policy_class = "ultra.baselines.sac:sac-v0"
            log_dir = "tests/output_eval_check_logs"

            for episode in episodes(1, etag=policy_class, log_dir=log_dir):
                observations = env.reset()
                state = observations[AGENT_ID]
                dones, infos = {"__all__": False}, None
                episode.reset()
                experiment_dir = episode.experiment_dir

                if not os.path.exists(f"{experiment_dir}/spec.pkl"):
                    if not os.path.exists(experiment_dir):
                        os.makedirs(experiment_dir)
                    with open(f"{experiment_dir}/spec.pkl",
                              "wb") as spec_output:
                        dill.dump(spec, spec_output, pickle.HIGHEST_PROTOCOL)

                while not dones["__all__"]:
                    evaluation_check(
                        agent=agent,
                        agent_id=AGENT_ID,
                        episode=episode,
                        eval_rate=10,
                        eval_episodes=1,
                        max_episode_steps=2,
                        policy_class=policy_class,
                        scenario_info=("00", "eval_test"),
                        timestep_sec=0.1,
                        headless=True,
                        log_dir=log_dir,
                    )
                    action = agent.act(state, explore=True)
                    observations, rewards, dones, infos = env.step(
                        {AGENT_ID: action})
                    next_state = observations[AGENT_ID]

                    # retrieve some relavant information from reward processor
                    # observations[AGENT_ID]["ego"].update(rewards[AGENT_ID]["log"])
                    loss_output = agent.step(
                        state=state,
                        action=action,
                        reward=rewards[AGENT_ID],
                        next_state=next_state,
                        done=dones[AGENT_ID],
                    )
                    episode.record_step(
                        agent_id=AGENT_ID,
                        infos=infos,
                        rewards=rewards,
                        total_step=total_step,
                        loss_output=loss_output,
                    )
                    total_step += 1
                    state = next_state

            env.close()
示例#2
0
 def run_experiment():
     agent, env = prepare_test_env_agent()
     log_dir = os.path.join(EpisodeTest.OUTPUT_DIRECTORY, "logs/")
     episode = Episode(0)
     for episode in episodes(2, etag="Train", log_dir=log_dir):
         observations = env.reset()
         total_step = 0
         episode.reset()
         dones, infos = {"__all__": False}, None
         state = observations[AGENT_ID]
         while not dones["__all__"]:
             action = agent.act(state, explore=True)
             observations, rewards, dones, infos = env.step(
                 {AGENT_ID: action})
             next_state = observations[AGENT_ID]
             # observations[AGENT_ID].update(rewards[AGENT_ID])
             loss_output = agent.step(
                 state=state,
                 action=action,
                 reward=rewards[AGENT_ID],
                 next_state=next_state,
                 done=dones[AGENT_ID],
                 info=infos[AGENT_ID],
             )
             episode.record_step(
                 agent_ids_to_record=AGENT_ID,
                 infos=infos,
                 rewards=rewards,
                 total_step=total_step,
                 loss_outputs=loss_output,
             )
             state = next_state
             total_step += 1
     env.close()
     return episode.index
示例#3
0
 def run_experiment():
     agent, env = prepare_test_env_agent()
     episode_count = 0
     log_dir = "tests/logs"
     for episode in episodes(2, etag="Train", dir=log_dir):
         observations = env.reset()
         total_step = 0
         episode.reset()
         dones, infos = {"__all__": False}, None
         state = observations[AGENT_ID]
         while not dones["__all__"]:
             action = agent.act(state, explore=True)
             observations, rewards, dones, infos = env.step(
                 {AGENT_ID: action})
             next_state = observations[AGENT_ID]
             # observations[AGENT_ID].update(rewards[AGENT_ID])
             loss_output = agent.step(
                 state=state,
                 action=action,
                 reward=rewards[AGENT_ID],
                 next_state=next_state,
                 done=dones[AGENT_ID],
             )
             episode.record_step(
                 agent_id=AGENT_ID,
                 infos=infos,
                 rewards=rewards,
                 total_step=total_step,
                 loss_output=loss_output,
             )
             state = next_state
             total_step += 1
         episode_count += 1
     env.close()
     return episode_count
示例#4
0
        def run_experiment():
            agent, env = prepare_test_env_agent()
            result = {
                "episode_reward": 0,
                "dist_center": 0,
                "goal_dist": 0,
                "speed": 0,
                "ego_num_violations": 0,
                "linear_jerk": 0,
                "angular_jerk": 0,
                "collision": 0,
                "off_road": 0,
                "off_route": 0,
                "reached_goal": 0,
            }
            for episode in episodes(1, etag="Train"):
                observations = env.reset()
                total_step = 0
                episode.reset()
                dones, infos = {"__all__": False}, None
                state = observations[AGENT_ID]

                while not dones["__all__"] and total_step < 4:
                    action = agent.act(state, explore=True)
                    observations, rewards, dones, infos = env.step(
                        {AGENT_ID: action})
                    next_state = observations[AGENT_ID]
                    # observations[AGENT_ID]["ego"].update(rewards[AGENT_ID]["log"])
                    loss_output = agent.step(
                        state=state,
                        action=action,
                        reward=rewards[AGENT_ID],
                        next_state=next_state,
                        done=dones[AGENT_ID],
                    )

                    for key in result.keys():
                        if key in observations[AGENT_ID]:
                            if key == "goal_dist":
                                result[key] = observations[AGENT_ID]
                            else:
                                result[key] += observations[AGENT_ID][key]
                        elif key == "episode_reward":
                            result[key] += rewards[AGENT_ID]

                    episode.record_step(
                        agent_id=AGENT_ID,
                        infos=infos,
                        rewards=rewards,
                        total_step=total_step,
                        loss_output=loss_output,
                    )

                    state = next_state
                    total_step += 1
            env.close()
            episode.record_episode()
            return result, episode
示例#5
0
def train(
    scenario_info,
    num_episodes,
    policy_classes,
    max_episode_steps,
    eval_info,
    timestep_sec,
    headless,
    seed,
    log_dir,
    policy_ids=None,
):
    torch.set_num_threads(1)
    total_step = 0
    finished = False

    # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...;
    # or use the provided policy_ids if available.
    agent_ids = (
        ["0" * max(0, 3 - len(str(i))) + str(i) for i in range(len(policy_classes))]
        if not policy_ids
        else policy_ids
    )
    # Ensure there is an ID for each policy, and a policy for each ID.
    assert len(agent_ids) == len(policy_classes), (
        "The number of agent IDs provided ({}) must be equal to "
        "the number of policy classes provided ({}).".format(
            len(agent_ids), len(policy_classes)
        )
    )

    # Assign the policy classes to their associated ID.
    agent_classes = {
        agent_id: policy_class
        for agent_id, policy_class in zip(agent_ids, policy_classes)
    }
    # Create the agent specifications matched with their associated ID.
    agent_specs = {
        agent_id: make(locator=policy_class, max_episode_steps=max_episode_steps)
        for agent_id, policy_class in agent_classes.items()
    }
    # Create the agents matched with their associated ID.
    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    # Create the environment.
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"]
    # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
    etag = ":".join([policy_class.split(":")[-1] for policy_class in policy_classes])

    for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):
        # Reset the environment and retrieve the initial observations.
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset()
        experiment_dir = episode.experiment_dir

        # Save relevant agent metadata.
        if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file:
                dill.dump(
                    {
                        "agent_ids": agent_ids,
                        "agent_classes": agent_classes,
                        "agent_specs": agent_specs,
                    },
                    metadata_file,
                    pickle.HIGHEST_PROTOCOL,
                )

        while not dones["__all__"]:
            # Break if any of the agent's step counts is 1000000 or greater.
            if any([episode.get_itr(agent_id) >= 1000000 for agent_id in agents]):
                finished = True
                break

            # Perform the evaluation check.
            evaluation_check(
                agents=agents,
                agent_ids=agent_ids,
                policy_classes=agent_classes,
                episode=episode,
                log_dir=log_dir,
                max_episode_steps=max_episode_steps,
                **eval_info,
                **env.info,
            )

            # Request and perform actions on each agent that received an observation.
            actions = {
                agent_id: agents[agent_id].act(observation, explore=True)
                for agent_id, observation in observations.items()
            }
            next_observations, rewards, dones, infos = env.step(actions)

            # Active agents are those that receive observations in this step and the next
            # step. Step each active agent (obtaining their network loss if applicable).
            active_agent_ids = observations.keys() & next_observations.keys()
            loss_outputs = {
                agent_id: agents[agent_id].step(
                    state=observations[agent_id],
                    action=actions[agent_id],
                    reward=rewards[agent_id],
                    next_state=next_observations[agent_id],
                    done=dones[agent_id],
                    info=infos[agent_id],
                )
                for agent_id in active_agent_ids
            }

            # Record the data from this episode.
            episode.record_step(
                agent_ids_to_record=active_agent_ids,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_outputs=loss_outputs,
            )

            # Update variables for the next step.
            total_step += 1
            observations = next_observations

        # Normalize the data and record this episode on tensorboard.
        episode.record_episode()
        episode.record_tensorboard()

        if finished:
            break

    env.close()
示例#6
0
def evaluate_saved_models(
    experiment_dir: str,
    log_dir: str,
    headless: bool,
    max_episode_steps: int,
    agents: Sequence[str],
    num_episodes: int,
    scenario_info: Tuple[str, str],
    timestep: float,
    models_to_evaluate: Optional[str] = None,
):

    # If no agents are explicitly given then by default all agents are
    # enabled for evaluation
    if not agents:
        agents = os.listdir(os.path.join(experiment_dir, "models"))

    # Model path for each agent id
    model_paths = [
        os.path.join(experiment_dir, "models", agent) for agent in agents
    ]

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    if not all([os.path.exists(model_path) for model_path in model_paths]):
        raise "At least one path to a model is invalid"
    if not all([os.listdir(model_path) for model_path in model_paths]):
        raise "There are no models to evaluate in at least one model path"

    # Get agent IDs from the models to be evaluated.
    agent_ids_from_models = [
        os.path.basename(os.path.normpath(model_path))
        for model_path in model_paths
    ]

    # Load relevant agent metadata.
    with open(os.path.join(experiment_dir, "agent_metadata.pkl"),
              "rb") as metadata_file:
        agent_metadata = pickle.load(metadata_file)

    # Extract the agent IDs and policy classes from the metadata and given models.
    agent_ids = [
        agent_id for agent_id in agent_metadata["agent_ids"]
        if agent_id in agent_ids_from_models
    ]
    policy_classes = {
        agent_id: agent_metadata["agent_classes"][agent_id]
        for agent_id in agent_ids
    }

    # From a base model directory such as logs/<experiment_name>/models/*, assign each agent ID with its
    # checkpoint directories in sorted order based on the checkpoint iteration. The agent IDs are
    # obtained from the direct child folders of the model directory given. As an example result:
    # {
    #     '000': ['logs/<experiment_name>/models/000/1042', 'logs/<experiment_name>/models/000/2062'],
    #     '001': ['logs/<experiment_name>/models/001/999', 'logs/<experiment_name>/models/001/1999'],
    #     '003': ['logs/<experiment_name>/models/003/1009', 'logs/<experiment_name>/models/003/2120'],
    #     '002': ['logs/<experiment_name>/models/002/1053', 'logs/<experiment_name>/models/002/2041'],
    # }
    agent_checkpoint_directories = {
        agent_id: sorted(
            glob.glob(os.path.join(experiment_dir, "models", agent_id, "*")),
            key=lambda x: int(x.split("/")[-1]),
        )
        for agent_id in agent_ids
    }

    # If models are explicitly given through the CLI, then their respective model
    # directory paths are calculated.
    if models_to_evaluate:
        custom_checkpoint_directories = {}
        # Iterate through each model to be evaluated (models that do not exist will not be included)
        for model in models_to_evaluate:
            agent_id = model.split("/")[0]
            model_observation_number = model.split("/")[-1]
            if agent_id in agent_checkpoint_directories.keys():
                model_directories = {
                    model_directory.split("/")[-1]: model_directory
                    for model_directory in
                    agent_checkpoint_directories[agent_id]
                }
                if model_observation_number in model_directories:
                    if agent_id in custom_checkpoint_directories:
                        custom_checkpoint_directories[agent_id].append(
                            model_directories[model_observation_number])
                    else:
                        custom_checkpoint_directories[agent_id] = [
                            model_directories[model_observation_number]
                        ]
                else:
                    raise Exception(
                        f"The agent with id: {agent_id} does not contain the provided observation number: {model_observation_number}"
                    )
            else:
                raise Exception(
                    f"The agent id: {agent_id} is not in the specified agent IDs"
                )

        # Agent checkpoint directories contains the specified model directories for the
        # specified agents
        agent_checkpoint_directories = custom_checkpoint_directories

    etag = (":".join(
        [policy_classes[agent_id].split(":")[-1]
         for agent_id in agent_ids]) + "-evaluation")

    for agent_id, checkpoint_directories in agent_checkpoint_directories.items(
    ):
        num_of_checkpoints = len(checkpoint_directories)
        ray.init()
        try:
            for episode in episodes(
                    num_of_checkpoints,
                    etag=etag,
                    log_dir=log_dir,
            ):
                # Obtain a checkpoint directory for each agent.
                checkpoint_directory = {
                    agent_id: checkpoint_directories[episode.index]
                }
                episode.eval_mode()
                episode.info[episode.active_tag] = ray.get([
                    evaluate.remote(
                        experiment_dir=experiment_dir,
                        agent_ids=[agent_id],
                        policy_classes=policy_classes,
                        seed=episode.eval_count,
                        checkpoint_dirs=checkpoint_directory,
                        scenario_info=scenario_info,
                        num_episodes=num_episodes,
                        max_episode_steps=max_episode_steps,
                        timestep_sec=timestep,
                        headless=headless,
                        log_dir=log_dir,
                    )
                ])[0]
                episode.record_tensorboard(recording_step=episode.index)
                episode.eval_count += 1
        finally:
            time.sleep(1)
            ray.shutdown()
示例#7
0
def evaluate(
    experiment_dir,
    seed,
    agent_ids,
    policy_classes,
    checkpoint_dirs,
    scenario_info,
    num_episodes,
    max_episode_steps,
    headless,
    timestep_sec,
    log_dir,
    eval_mode=True,
):
    torch.set_num_threads(1)

    # Create the agent specifications matched with their associated ID.
    agent_specs = {
        agent_id: make(
            locator=policy_classes[agent_id],
            checkpoint_dir=checkpoint_dirs[agent_id],
            experiment_dir=experiment_dir,
            max_episode_steps=max_episode_steps,
            agent_id=agent_id,
        )
        for agent_id in agent_ids
    }

    # Create the environment with the specified agents.
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
        eval_mode=eval_mode,
    )

    # Build each agent from its specification.
    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    # A dictionary to hold the evaluation data for each agent.
    summary_log = {agent_id: LogInfo() for agent_id in agent_ids}

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"]
    # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
    etag = ":".join(
        [policy_class.split(":")[-1] for policy_class in policy_classes])

    for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):
        # Reset the environment and retrieve the initial observations.
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset(mode="Evaluation")

        while not dones["__all__"]:
            # Get and perform the available agents' actions.
            actions = {
                agent_id: agents[agent_id].act(observation, explore=False)
                for agent_id, observation in observations.items()
            }
            observations, rewards, dones, infos = env.step(actions)

            # Record the data from this episode.
            episode.record_step(agent_ids_to_record=infos.keys(),
                                infos=infos,
                                rewards=rewards)

        episode.record_episode()

        for agent_id, agent_data in episode.info[episode.active_tag].items():
            for key, value in agent_data.data.items():
                if not isinstance(value, (list, tuple, np.ndarray)):
                    summary_log[agent_id].data[key] += value

    # Normalize by the number of evaluation episodes.
    for agent_id, agent_data in summary_log.items():
        for key, value in agent_data.data.items():
            if not isinstance(value, (list, tuple, np.ndarray)):
                summary_log[agent_id].data[key] /= num_episodes

    env.close()

    return summary_log
示例#8
0
def evaluate(
    experiment_dir,
    seed,
    agent_id,
    policy_class,
    itr_count,
    checkpoint_dir,
    scenario_info,
    num_episodes,
    headless,
    timestep_sec,
):

    torch.set_num_threads(1)
    spec = make(
        locator=policy_class,
        checkpoint_dir=checkpoint_dir,
        experiment_dir=experiment_dir,
    )

    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs={agent_id: spec},
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
        eval_mode=True,
    )

    agent = spec.build_agent()
    summary_log = LogInfo()
    logs = []

    for episode in episodes(num_episodes):
        observations = env.reset()
        state = observations[agent_id]
        dones, infos = {"__all__": False}, None

        episode.reset(mode="Evaluation")
        while not dones["__all__"]:
            action = agent.act(state, explore=False)
            observations, rewards, dones, infos = env.step({agent_id: action})

            next_state = observations[agent_id]

            state = next_state

            episode.record_step(agent_id=agent_id, infos=infos, rewards=rewards)

        episode.record_episode()
        logs.append(episode.info[episode.active_tag].data)

        for key, value in episode.info[episode.active_tag].data.items():
            if not isinstance(value, (list, tuple, np.ndarray)):
                summary_log.data[key] += value

    for key, val in summary_log.data.items():
        if not isinstance(val, (list, tuple, np.ndarray)):
            summary_log.data[key] /= num_episodes

    env.close()

    return summary_log
示例#9
0
        if args.policy in data["agents"].keys():
            policy_path = data["agents"][args.policy]["path"]
            policy_locator = data["agents"][args.policy]["locator"]
        else:
            raise ImportError("Invalid policy name. Please try again")

    # Required string for smarts' class registry
    policy_class = str(policy_path) + ":" + str(policy_locator)
    num_cpus = max(
        1, psutil.cpu_count(logical=False) - 1
    )  # remove `logical=False` to use all cpus
    ray_kwargs = default_ray_kwargs(num_cpus=num_cpus, num_gpus=num_gpus)
    ray.init(**ray_kwargs)
    try:
        agent_id = "AGENT_008"
        for episode in episodes(len(sorted_models), etag=args.policy):
            model = sorted_models[episode.index]
            print("model: ", model)
            episode_count = model.split("/")[-1]
            episode.eval_mode()
            episode.info[episode.active_tag] = ray.get(
                [
                    evaluate.remote(
                        experiment_dir=args.experiment_dir,
                        agent_id=agent_id,
                        policy_class=policy_class,
                        seed=episode.eval_count,
                        itr_count=0,
                        checkpoint_dir=model,
                        scenario_info=(args.task, args.level),
                        num_episodes=int(args.episodes),
示例#10
0
def train(
    scenario_info,
    num_episodes,
    max_episode_steps,
    policy_class,
    eval_info,
    timestep_sec,
    headless,
    seed,
    log_dir,
):
    torch.set_num_threads(1)
    total_step = 0
    finished = False

    AGENT_ID = "007"

    spec = make(locator=policy_class, max_episode_steps=max_episode_steps)
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs={AGENT_ID: spec},
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )

    agent = spec.build_agent()

    for episode in episodes(num_episodes, etag=policy_class, log_dir=log_dir):
        observations = env.reset()
        state = observations[AGENT_ID]
        dones, infos = {"__all__": False}, None
        episode.reset()
        experiment_dir = episode.experiment_dir

        # save entire spec [ policy_params, reward_adapter, observation_adapter]
        if not os.path.exists(f"{experiment_dir}/spec.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/spec.pkl", "wb") as spec_output:
                dill.dump(spec, spec_output, pickle.HIGHEST_PROTOCOL)

        while not dones["__all__"]:
            if episode.get_itr(AGENT_ID) >= 1000000:
                finished = True
                break
            evaluation_check(
                agent=agent,
                agent_id=AGENT_ID,
                policy_class=policy_class,
                episode=episode,
                log_dir=log_dir,
                max_episode_steps=max_episode_steps,
                **eval_info,
                **env.info,
            )
            action = agent.act(state, explore=True)
            observations, rewards, dones, infos = env.step({AGENT_ID: action})
            next_state = observations[AGENT_ID]

            loss_output = agent.step(
                state=state,
                action=action,
                reward=rewards[AGENT_ID],
                next_state=next_state,
                done=dones[AGENT_ID],
            )
            episode.record_step(
                agent_id=AGENT_ID,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_output=loss_output,
            )
            total_step += 1
            state = next_state

        episode.record_episode()
        episode.record_tensorboard(agent_id=AGENT_ID)
        if finished:
            break

    env.close()
示例#11
0
def run_experiment(scenario_info, num_agents, log_dir, headless=True):
    agent_ids = [
        "0" * max(0, 3 - len(str(i))) + str(i) for i in range(num_agents)
    ]
    agent_classes = {
        agent_id: "ultra.baselines.sac:sac-v0"
        for agent_id in agent_ids
    }
    agent_specs = {
        agent_id: BaselineAgentSpec(policy_class=SACPolicy,
                                    max_episode_steps=2)
        for agent_id in agent_ids
    }

    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=0.1,
        seed=seed,
    )

    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    total_step = 0
    etag = ":".join(
        [policy_class.split(":")[-1] for policy_class in agent_classes])
    evaluation_task_ids = dict()

    for episode in episodes(1, etag=etag, log_dir=log_dir):
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset()
        experiment_dir = episode.experiment_dir

        if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/agent_metadata.pkl",
                      "wb") as metadata_file:
                dill.dump(
                    {
                        "agent_ids": agent_ids,
                        "agent_classes": agent_classes,
                        "agent_specs": agent_specs,
                    },
                    metadata_file,
                    pickle.HIGHEST_PROTOCOL,
                )

        while not dones["__all__"]:
            evaluation_check(
                agents=agents,
                agent_ids=agent_ids,
                episode=episode,
                eval_rate=10,
                eval_episodes=1,
                max_episode_steps=2,
                policy_classes=agent_classes,
                scenario_info=scenario_info,
                evaluation_task_ids=evaluation_task_ids,
                timestep_sec=0.1,
                headless=True,
                log_dir=log_dir,
            )
            collect_evaluations(evaluation_task_ids=evaluation_task_ids)

            actions = {
                agent_id: agents[agent_id].act(observation, explore=True)
                for agent_id, observation in observations.items()
            }
            next_observations, rewards, dones, infos = env.step(actions)

            active_agent_ids = observations.keys() & next_observations.keys()
            loss_outputs = {
                agent_id: agents[agent_id].step(
                    state=observations[agent_id],
                    action=actions[agent_id],
                    reward=rewards[agent_id],
                    next_state=next_observations[agent_id],
                    done=dones[agent_id],
                    info=infos[agent_id],
                )
                for agent_id in active_agent_ids
            }

            episode.record_step(
                agent_ids_to_record=active_agent_ids,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_outputs=loss_outputs,
            )

            total_step += 1
            observations = next_observations

    # Wait on the remaining evaluations to finish.
    while collect_evaluations(evaluation_task_ids):
        time.sleep(0.1)

    env.close()
示例#12
0
def train(
    scenario_info,
    num_episodes,
    policy_classes,
    max_episode_steps,
    max_steps,
    eval_info,
    timestep_sec,
    headless,
    seed,
    log_dir,
    policy_ids=None,
):
    torch.set_num_threads(1)
    total_step = 0
    finished = False
    evaluation_task_ids = dict()

    agent_ids, agent_classes, agent_specs, agents, etag = build_agents(
        policy_classes, policy_ids, max_episode_steps)

    # Create the environment.
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )

    for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):

        # Reset the environment and retrieve the initial observations.
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset()

        experiment_dir = episode.experiment_dir
        # Name of agent metadata pickle file
        filename = "agent_metadata.pkl"
        if not os.path.exists(os.path.join(experiment_dir, filename)):
            _save_agent_metadata(
                experiment_dir,
                filename,
                agent_ids,
                agent_classes,
                agent_specs,
            )

        evaluation_check(
            agents=agents,
            agent_ids=agent_ids,
            policy_classes=agent_classes,
            episode=episode,
            log_dir=log_dir,
            max_episode_steps=max_episode_steps,
            evaluation_task_ids=evaluation_task_ids,
            **eval_info,
            **env.info,
        )

        collect_evaluations(evaluation_task_ids=evaluation_task_ids)

        evaluation_check(
            agents=agents,
            agent_ids=agent_ids,
            policy_classes=agent_classes,
            episode=episode,
            log_dir=log_dir,
            max_episode_steps=max_episode_steps,
            evaluation_task_ids=evaluation_task_ids,
            **eval_info,
            **env.info,
        )

        collect_evaluations(evaluation_task_ids=evaluation_task_ids)

        while not dones["__all__"]:
            # Break if any of the agent's step counts is max_steps (default is 1000000) or greater.
            if any([
                    episode.get_itr(agent_id) >= max_steps
                    for agent_id in agents
            ]):
                finished = True
                break
            # Request and perform actions on each agent that received an observation.
            actions = {
                agent_id: agents[agent_id].act(observation, explore=True)
                for agent_id, observation in observations.items()
            }
            next_observations, rewards, dones, infos = env.step(actions)

            # Active agents are those that receive observations in this step and the next
            # step. Step each active agent (obtaining their network loss if applicable).
            active_agent_ids = observations.keys() & next_observations.keys()
            loss_outputs = {
                agent_id: agents[agent_id].step(
                    state=observations[agent_id],
                    action=actions[agent_id],
                    reward=rewards[agent_id],
                    next_state=next_observations[agent_id],
                    done=dones[agent_id],
                    info=infos[agent_id],
                )
                for agent_id in active_agent_ids
            }

            # Record the data from this episode.
            episode.record_step(
                agent_ids_to_record=active_agent_ids,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_outputs=loss_outputs,
            )

            # Update variables for the next step.
            total_step += 1
            observations = next_observations

        episode.record_episode()
        episode.record_tensorboard(recording_step=episode.index)

        if finished:
            break

    # Wait on the remaining evaluations to finish.
    while collect_evaluations(evaluation_task_ids):
        time.sleep(0.1)

    env.close()
示例#13
0
        raise "Path to model is invalid"

    if not os.listdir(args.models):
        raise "No models to evaluate"

    sorted_models = sorted(
        glob.glob(f"{args.models}/*"), key=lambda x: int(x.split("/")[-1])
    )
    base_dir = os.path.dirname(__file__)
    pool_path = os.path.join(base_dir, "agent_pool.json")

    ray.init()
    try:
        agent_id = "AGENT_008"
        for episode in episodes(
            len(sorted_models), etag=policy_class, log_dir=args.log_dir
        ):
            model = sorted_models[episode.index]
            print("model: ", model)
            episode_count = model.split("/")[-1]
            episode.eval_mode()
            episode.info[episode.active_tag] = ray.get(
                [
                    evaluate.remote(
                        experiment_dir=args.experiment_dir,
                        agent_id=agent_id,
                        policy_class=policy_class,
                        seed=episode.eval_count,
                        itr_count=0,
                        checkpoint_dir=model,
                        scenario_info=(args.task, args.level),
示例#14
0
        len(checkpoint_directory) == number_of_checkpoints
        for checkpoint_directory in directories_iterator
    ), "Not all agents have the same number of checkpoints saved"

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a {"000": "ultra.baselines.dqn:dqn-v0", "001": "ultra.baselines.ppo:ppo-v0"]
    # policy_classes dict, transform it to an etag of "dqn-v0:ppo-v0-evaluation".
    etag = (":".join(
        [policy_classes[agent_id].split(":")[-1]
         for agent_id in agent_ids]) + "-evaluation")

    ray.init()
    try:
        for episode in episodes(
                number_of_checkpoints,
                etag=etag,
                log_dir=args.log_dir,
        ):
            # Obtain a checkpoint directory for each agent.
            current_checkpoint_directories = {
                agent_id: agent_directories[episode.index]
                for agent_id, agent_directories in
                agent_checkpoint_directories.items()
            }
            episode.eval_mode()
            episode.info[episode.active_tag] = ray.get([
                evaluate.remote(
                    experiment_dir=args.experiment_dir,
                    agent_ids=agent_ids,
                    policy_classes=policy_classes,
                    seed=episode.eval_count,
示例#15
0
文件: tune.py 项目: valaxkong/SMARTS
def tune_train(
    config,
    scenario_info,
    num_episodes,
    policy_classes,
    max_episode_steps,
    save_rate,
    timestep_sec,
    headless,
    seed,
    log_dir,
    metric,
):
    torch.set_num_threads(1)
    total_step = 0
    finished = False

    assert len(
        policy_classes) == 1, "Can only tune with single agent experiments."

    # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...
    agent_ids = [
        "0" * max(0, 3 - len(str(i))) + str(i)
        for i in range(len(policy_classes))
    ]
    # Assign the policy classes to their associated ID.
    agent_classes = {
        agent_id: policy_class
        for agent_id, policy_class in zip(agent_ids, policy_classes)
    }
    # Create the agent specifications matched with their associated ID.
    agent_specs = {
        agent_id: make(
            locator=policy_class,
            agent_params=config,
            max_episode_steps=max_episode_steps,
        )
        for agent_id, policy_class in agent_classes.items()
    }
    # Create the agents matched with their associated ID.
    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    # Create the environment.
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"]
    # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
    etag = ":".join(
        [policy_class.split(":")[-1] for policy_class in policy_classes])

    for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):
        # Reset the environment and retrieve the initial observations.
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset()
        experiment_dir = episode.experiment_dir

        # Save relevant agent metadata.
        if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/agent_metadata.pkl",
                      "wb") as metadata_file:
                dill.dump(
                    {
                        "agent_ids": agent_ids,
                        "agent_classes": agent_classes,
                        "agent_specs": agent_specs,
                    },
                    metadata_file,
                    pickle.HIGHEST_PROTOCOL,
                )

        while not dones["__all__"]:
            # Break if any of the agent's step counts is 1000000 or greater.
            if any(
                [episode.get_itr(agent_id) >= 1000000 for agent_id in agents]):
                finished = True
                break

            # Request and perform actions on each agent that received an observation.
            actions = {
                agent_id: agents[agent_id].act(observation, explore=True)
                for agent_id, observation in observations.items()
            }
            next_observations, rewards, dones, infos = env.step(actions)

            # Active agents are those that receive observations in this step and the next
            # step. Step each active agent (obtaining their network loss if applicable).
            active_agent_ids = observations.keys() & next_observations.keys()
            loss_outputs = {
                agent_id: agents[agent_id].step(
                    state=observations[agent_id],
                    action=actions[agent_id],
                    reward=rewards[agent_id],
                    next_state=next_observations[agent_id],
                    done=dones[agent_id],
                    info=infos[agent_id],
                )
                for agent_id in active_agent_ids
            }

            # Record the data from this episode.
            episode.record_step(
                agent_ids_to_record=active_agent_ids,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_outputs=loss_outputs,
            )

            # Update variables for the next step.
            total_step += 1
            observations = next_observations

        # Normalize the data and record this episode on tensorboard.
        episode.record_episode()
        episode.record_tensorboard(recording_step=episode.index)

        # Save the agent if we have reached its save rate.
        if (episode.index + 1) % save_rate == 0:
            for agent_id in agent_ids:
                checkpoint_directory = episode.checkpoint_dir(
                    agent_id, episode.index)
                agents[agent_id].save(checkpoint_directory)

        # Average the metric over the number of agents (1 agent).
        tune_value = sum([
            episode.info[episode.active_tag][agent_id].data[metric]
            for agent_id in agent_ids
        ]) / len(agent_ids)
        tune.report(**{metric: tune_value})

        if finished:
            break

    env.close()