Пример #1
0
def random_episodes(env_ctor, num_episodes, output_dir=None):
    env = env_ctor()
    env = wrappers.CollectGymDataset(env, output_dir)
    episodes = []
    for _ in range(num_episodes):
        policy = lambda env, obs: env.action_space.sample()
        done = False
        obs = env.reset()
        while not done:
            action = policy(env, obs)
            obs, _, done, info = env.step(action)
        episodes.append(info['episode'])
    return episodes
Пример #2
0
def random_episodes(env_ctor, num_episodes, outdir=None):
    env = env_ctor()
    env = wrappers.CollectGymDataset(env, outdir)
    episodes = [] if outdir else None
    for _ in range(num_episodes):
        policy = lambda env, obs: env.action_space.sample()
        done = False
        obs = env.reset()
        while not done:
            action = policy(env, obs)
            obs, _, done, info = env.step(action)
        if outdir is None:
            episodes.append(info['episode'])
    try:
        env.close()
    except AttributeError:
        pass
    return episodes
Пример #3
0
def random_episodes(env_ctor, num_episodes, output_dir=None):
    env = env_ctor()  # env is an <ExternalProcess object>.
    env = wrappers.CollectGymDataset(env, output_dir)
    episodes = []
    for _ in range(num_episodes):
        policy = lambda env, obs: env.action_space.sample()
        done = False
        obs = env.reset()
        # cnt = 0
        while not done:
            action = policy(env, obs)
            obs, _, done, info = env.step(action)  # env.step
        #   cnt += 1
        # print(cnt)
        episodes.append(
            info['episode']
        )  # if done is True, info stores the 'episode' information and 'episode' is written in a file(e.g. "~/planet/log_debug/00001/test_episodes").
    return episodes