Пример #1
0
    def make_env(process_idx, test):
        env = gym.make(args.env)
        # Unwrap TimiLimit wrapper
        assert isinstance(env, gym.wrappers.TimeLimit)
        env = env.env
        # Use different random seeds for train and test envs
        process_seed = int(process_seeds[process_idx])
        env_seed = 2**32 - 1 - process_seed if test else process_seed
        env.seed(env_seed)

        if isinstance(env.observation_space, Box):
            # Cast observations to float32 because our model uses float32
            env = chainerrl.wrappers.CastObservationToFloat32(env)
        else:
            env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
                args.env, max_frames=None),
                                               episode_life=not test,
                                               clip_rewards=not test)

        if isinstance(env.action_space, Box):
            # Normalize action space to [-1, 1]^n
            env = wrappers.NormalizeActionSpace(env)
        if args.monitor:
            env = gym.wrappers.Monitor(env, args.outdir)
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env
Пример #2
0
 def make_env_check():
     # Use different random seeds for train and test envs
     env_seed = args.seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
         args.env, max_frames=args.max_frames),
                                        episode_life=True,
                                        clip_rewards=True)
     env.seed(int(env_seed))
     return env
Пример #3
0
def make_env():
    env = atari_wrappers.wrap_deepmind(
        atari_wrappers.make_atari(env_name),
        episode_life=False,
        clip_rewards=False,
    )
    env.seed(seed)
    misc.env_modifiers.make_rendered(env)
    return env
Пример #4
0
 def make_env(test):
     # Use different random seeds for train and test envs
     env_seed = test_seed if test else train_seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(args.env),
                                        episode_life=not test,
                                        clip_rewards=not test)
     env.seed(int(env_seed))
     if args.monitor:
         env = gym.wrappers.Monitor(
             env, args.outdir, mode='evaluation' if test else 'training')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
Пример #5
0
 def make_env():
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
         args.env, max_frames=None),
                                        episode_life=False,
                                        clip_rewards=False)
     env.seed(int(args.seed))
     # Randomize actions like epsilon-greedy
     env = chainerrl.wrappers.RandomizeAction(env, 0.01)
     if args.monitor:
         env = gym.wrappers.Monitor(env, args.outdir, mode='evaluation')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
Пример #6
0
 def make_env(process_idx, test):
     # Use different random seeds for train and test envs
     process_seed = process_seeds[process_idx]
     env_seed = 2**31 - 1 - process_seed if test else process_seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
         args.env, max_frames=args.max_frames),
                                        episode_life=not test,
                                        clip_rewards=not test)
     env.seed(int(env_seed))
     if args.monitor:
         env = gym.wrappers.Monitor(
             env, args.outdir, mode='evaluation' if test else 'training')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
Пример #7
0
 def make_env(test):
     # Use different random seeds for train and test envs
     env_seed = test_seed if test else train_seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
         args.env, max_frames=None),
                                        episode_life=not test,
                                        clip_rewards=not test)
     env.seed(int(env_seed))
     if test:
         # Randomize actions like epsilon-greedy in evaluation as well
         env = chainerrl.wrappers.RandomizeAction(env, 0.05)
     if args.monitor:
         env = chainerrl.wrappers.Monitor(
             env, args.outdir, mode='evaluation' if test else 'training')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
Пример #8
0
 def make_env(idx, test):
     # Use different random seeds for train and test envs
     process_seed = int(process_seeds[idx])
     env_seed = 2**32 - 1 - process_seed if test else process_seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(args.env),
                                        episode_life=not test,
                                        clip_rewards=not test)
     if test:
         # Randomize actions like epsilon-greedy in evaluation as well
         env = chainerrl.wrappers.RandomizeAction(env, args.eval_epsilon)
     env.seed(env_seed)
     if args.monitor:
         env = gym.wrappers.Monitor(
             env, args.outdir, mode='evaluation' if test else 'training')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
Пример #9
0
def make_env():
    env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(env_name),
                                       episode_life=False,
                                       clip_rewards=False)
    env.seed(seed)
    return env