def __init__(self,
                 verbose=0,
                 save_log_flag=False,
                 log_num=None,
                 do_proc_simulation=True,
                 custom_num_proc=0):
        super(ModuleSelectEnv, self).__init__()
        self.verbose = verbose
        self.save_log_flag = save_log_flag
        if self.save_log_flag and log_num is not None:
            self._init_log_to_write(log_num)
        self.do_proc_simulation = do_proc_simulation
        self.num_proc = custom_num_proc

        stats_path = "modules/logs/sac/DonkeyVae-v0-level-0_6/DonkeyVae-v0-level-0"
        hyperparams, stats_path = get_saved_hyperparams(stats_path,
                                                        norm_reward=False)
        hyperparams['vae_path'] = "modules/logs/vae-level-0-dim-32.pkl"
        self.inner_env = create_test_env(stats_path=stats_path,
                                         seed=0,
                                         log_dir="modules/logs",
                                         hyperparams=hyperparams)

        model_path = "modules/logs/sac/DonkeyVae-v0-level-0_6/DonkeyVae-v0-level-0.pkl"
        self.model = ALGOS["sac"].load(model_path)

        self.num_modules = 5
        self.module0 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[0], static_terms[0])
        self.module1 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[1], static_terms[1])
        self.module2 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[2], static_terms[2])
        self.module3 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[3], static_terms[3])
        self.module4 = VAESACModule(self.inner_env, self.model,
                                    delay_weights[4], static_terms[4])

        if self.continuous:
            # the probability of selection of end-to-end module
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(self.num_modules, ))
        else:
            # lane detection, end-to-end
            self.action_space = spaces.Discrete(self.num_modules)

        self.observation_space = spaces.Box(
            low=np.finfo(np.float32).min,
            high=np.finfo(np.float32).max,
            shape=(1, ),  # 0 to 11, total 12 dim
            dtype=np.float32)
Пример #2
0
    def __init__(self):
        set_global_seeds(0)

        hyperparams, stats_path = get_saved_hyperparams(STATS_PATH,
                                                        norm_reward=False)
        hyperparams["vae_path"] = LEVEL_NAME.vae()
        self.env = create_test_env(
            stats_path=stats_path,
            seed=0,
            log_dir=None,
            hyperparams=hyperparams,
        )
        self.model = DDPG.load(LEVEL_NAME.model())
Пример #3
0
    def __init__(self, service: AIExchangeService):
        set_global_seeds(0)

        hyperparams, stats_path = get_saved_hyperparams(STATS_PATH,
                                                        norm_reward=False)
        hyperparams["vae_path"] = LEVEL_NAME.vae()
        self.service = service
        self.simulation = Simulation(service)
        self.env = create_test_env(
            stats_path=stats_path,
            seed=0,
            log_dir=None,
            hyperparams=hyperparams,
            simulation=self.simulation,
        )
        self.model = DDPG.load(LEVEL_NAME.model())
Пример #4
0
    found = os.path.isfile(model_path)
    if not found:
        raise ValueError(
            f"No model found for {algo} on {env_id}, path: {model_path}")

    stats_path = os.path.join(log_path, env_id)
    hyperparams, stats_path = get_saved_hyperparams(stats_path)

    is_atari = ExperimentManager.is_atari(env_id)

    env = create_test_env(
        env_id,
        n_envs=n_envs,
        stats_path=stats_path,
        seed=seed,
        log_dir=None,
        should_render=not args.no_render,
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path, env=env)

    obs = env.reset()

    if video_folder is None:
        video_folder = os.path.join(log_path, "videos")

    # Note: apparently it renders by default
    env = VecVideoRecorder(
        env,
    best_path = '_best'

model_path = os.path.join(log_path, "{}{}.pkl".format(ENV_ID, best_path))

assert os.path.isdir(log_path), "The {} folder was not found".format(log_path)
assert os.path.isfile(model_path), "No model found for {} on {}, path: {}".format(algo, ENV_ID, model_path)

set_global_seeds(args.seed)

stats_path = os.path.join(log_path, ENV_ID)
hyperparams, stats_path = get_saved_hyperparams(stats_path, norm_reward=args.norm_reward)
hyperparams['vae_path'] = args.vae_path

log_dir = args.reward_log if args.reward_log != '' else None

env = create_test_env(stats_path=stats_path, seed=args.seed, log_dir=log_dir,
                      hyperparams=hyperparams)

model = ALGOS[algo].load(model_path)

obs = env.reset()

# Force deterministic for SAC and DDPG
deterministic = args.deterministic or algo in ['ddpg', 'sac']
if args.verbose >= 1:
    print("Deterministic actions: {}".format(deterministic))

running_reward = 0.0
ep_len = 0
for _ in range(args.n_timesteps):
    action, _ = model.predict(obs, deterministic=deterministic)
    # Clip Action to avoid out of bound errors
def record_video(env_id: str = "CartPole-v1",
                 algo: str = "ppo",
                 folder: str = "rl-trained-agents",
                 video_folder: str = "logs/videos/",
                 video_length: int = 1000,
                 n_envs: int = 1,
                 deterministic: bool = False,
                 seed: int = 0,
                 no_render: bool = False,
                 exp_id: int = 0):

    if exp_id == 0:
        exp_id = get_latest_run_id(os.path.join(folder, algo), env_id)
        print(f"Loading latest experiment, id={exp_id}")
    # Sanity checks
    if exp_id > 0:
        log_path = os.path.join(folder, algo, f"{env_id}_{exp_id}")
    else:
        log_path = os.path.join(folder, algo)

    model_path = os.path.join(log_path, f"{env_id}.zip")

    stats_path = os.path.join(log_path, env_id)
    hyperparams, stats_path = get_saved_hyperparams(stats_path)

    is_atari = "NoFrameskip" in env_id

    env = create_test_env(
        env_id,
        n_envs=n_envs,
        stats_path=stats_path,
        seed=seed,
        log_dir=None,
        should_render=not no_render,
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path)

    obs = env.reset()

    # Note: apparently it renders by default
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda x: x == 0,
        video_length=video_length,
        name_prefix=f"{algo}-{env_id}",
    )

    env.reset()
    for _ in range(video_length + 1):
        action, _ = model.predict(obs, deterministic=deterministic)
        obs, _, _, _ = env.step(action)

    # Workaround for https://github.com/openai/gym/issues/893
    if n_envs == 1 and "Bullet" not in env_id and not is_atari:
        env = env.venv
        # DummyVecEnv
        while isinstance(env, VecEnvWrapper):
            env = env.venv
        if isinstance(env, DummyVecEnv):
            env.envs[0].env.close()
        else:
            env.close()
    else:
        # SubprocVecEnv
        env.close()