예제 #1
0
def prepare_test_env_agent(headless=True):
    timestep_sec = 0.1
    spec = BaselineAgentSpec(
        action_type=ActionSpaceType.Continuous,
        policy_class=SACPolicy,
        max_episode_steps=2,
    )
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs={AGENT_ID: spec},
        scenario_info=("00", "eval_test"),
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )
    agent = spec.build_agent()
    return agent, env, spec
예제 #2
0
def run_experiment(scenario_info, num_agents, log_dir, headless=True):
    agent_ids = [
        "0" * max(0, 3 - len(str(i))) + str(i) for i in range(num_agents)
    ]
    agent_classes = {
        agent_id: "ultra.baselines.sac:sac-v0"
        for agent_id in agent_ids
    }
    agent_specs = {
        agent_id: BaselineAgentSpec(policy_class=SACPolicy,
                                    max_episode_steps=2)
        for agent_id in agent_ids
    }

    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=0.1,
        seed=seed,
    )

    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    total_step = 0
    etag = ":".join(
        [policy_class.split(":")[-1] for policy_class in agent_classes])
    evaluation_task_ids = dict()

    for episode in episodes(1, etag=etag, log_dir=log_dir):
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset()
        experiment_dir = episode.experiment_dir

        if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/agent_metadata.pkl",
                      "wb") as metadata_file:
                dill.dump(
                    {
                        "agent_ids": agent_ids,
                        "agent_classes": agent_classes,
                        "agent_specs": agent_specs,
                    },
                    metadata_file,
                    pickle.HIGHEST_PROTOCOL,
                )

        while not dones["__all__"]:
            evaluation_check(
                agents=agents,
                agent_ids=agent_ids,
                episode=episode,
                eval_rate=10,
                eval_episodes=1,
                max_episode_steps=2,
                policy_classes=agent_classes,
                scenario_info=scenario_info,
                evaluation_task_ids=evaluation_task_ids,
                timestep_sec=0.1,
                headless=True,
                log_dir=log_dir,
            )
            collect_evaluations(evaluation_task_ids=evaluation_task_ids)

            actions = {
                agent_id: agents[agent_id].act(observation, explore=True)
                for agent_id, observation in observations.items()
            }
            next_observations, rewards, dones, infos = env.step(actions)

            active_agent_ids = observations.keys() & next_observations.keys()
            loss_outputs = {
                agent_id: agents[agent_id].step(
                    state=observations[agent_id],
                    action=actions[agent_id],
                    reward=rewards[agent_id],
                    next_state=next_observations[agent_id],
                    done=dones[agent_id],
                    info=infos[agent_id],
                )
                for agent_id in active_agent_ids
            }

            episode.record_step(
                agent_ids_to_record=active_agent_ids,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_outputs=loss_outputs,
            )

            total_step += 1
            observations = next_observations

    # Wait on the remaining evaluations to finish.
    while collect_evaluations(evaluation_task_ids):
        time.sleep(0.1)

    env.close()
예제 #3
0
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from smarts.zoo.registry import register
from .sac.sac.policy import SACPolicy
from .ppo.ppo.policy import PPOPolicy
from .dqn.dqn.policy import DQNPolicy
from .ddpg.ddpg.policy import TD3Policy
from .bdqn.bdqn.policy import BehavioralDQNPolicy
from smarts.core.controllers import ActionSpaceType
from ultra.baselines.agent_spec import BaselineAgentSpec

register(
    locator="sac-v0",
    entry_point=lambda **kwargs: BaselineAgentSpec(action_type=ActionSpaceType.
                                                   Continuous,
                                                   policy_class=SACPolicy,
                                                   **kwargs),
)
register(
    locator="ppo-v0",
    entry_point=lambda **kwargs: BaselineAgentSpec(action_type=ActionSpaceType.
                                                   Continuous,
                                                   policy_class=PPOPolicy,
                                                   **kwargs),
)
register(
    locator="ddpg-v0",
    entry_point=lambda **kwargs: BaselineAgentSpec(action_type=ActionSpaceType.
                                                   Continuous,
                                                   policy_class=TD3Policy,
                                                   **kwargs),
예제 #4
0
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from smarts.zoo.registry import register
from .sac.sac.policy import SACPolicy
from .ppo.ppo.policy import PPOPolicy
from .dqn.dqn.policy import DQNPolicy
from .td3.td3.policy import TD3Policy
from .bdqn.bdqn.policy import BehavioralDQNPolicy
from smarts.core.controllers import ActionSpaceType
from ultra.baselines.agent_spec import BaselineAgentSpec

register(
    locator="sac-v0",
    entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=SACPolicy,
                                                   **kwargs),
)
register(
    locator="ppo-v0",
    entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=PPOPolicy,
                                                   **kwargs),
)
register(
    locator="td3-v0",
    entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=TD3Policy,
                                                   **kwargs),
)
register(
    locator="dqn-v0",
    entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=DQNPolicy,
                                                   **kwargs),