def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None):
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
示例#2
0
文件: train.py 项目: oidelima/ppo
    def make_env(env_id, seed, rank, add_timestep, time_limit, evaluation):
        env = gym.make(env_id)
        is_atari = hasattr(gym.envs, "atari") and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        env.seed(seed + rank)
        obs_shape = env.observation_space.shape
        if add_timestep and len(
                obs_shape) == 1 and str(env).find("TimeLimit") > -1:
            env = AddTimestep(env)
        if is_atari and len(env.observation_space.shape) == 3:
            env = wrap_deepmind(env)

        # elif len(env.observation_space.shape) == 3:
        #     raise NotImplementedError(
        #         "CNN models work only for atari,\n"
        #         "please use a custom wrapper for a custom pixel input env.\n"
        #         "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env)

        if time_limit is not None:
            env = TimeLimit(env, max_episode_steps=time_limit)

        return env
示例#3
0
def create_atari_env(env_id,
                     monitor_logdir=None,
                     wrappers='deepmind',
                     policy='rnn',
                     num_buffer_frames=4,
                     max_repeats=0,
                     **_):
    env = gym.make(env_id)

    # start monitor to record statistics and videos
    if monitor_logdir:
        env = Monitor(env, monitor_logdir, video_callable=False, resume=True)

    if wrappers == 'deepmind':
        from common.atari_wrappers import wrap_deepmind
        env = wrap_deepmind(env)
    elif wrappers == 'universe':
        from universe.wrappers import Vectorize, Unvectorize
        from common.universe_wrappers import AtariRescale42x42, DiagnosticsInfo
        env = Vectorize(env)
        env = AtariRescale42x42(env)
        env = DiagnosticsInfo(env)
        env = Unvectorize(env)

    if policy == 'cnn' and num_buffer_frames > 0:
        env = ObservationBuffer(env, num_buffer_frames)

    if max_repeats > 0:
        env = FrameskipWrapper(env, max_repeats)

    return env
示例#4
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
示例#5
0
def wrap_atari_dqn(env):
    """
    wrap the environment in atari wrappers for DQN
    :param env: (Gym Environment) the environment
    :return: (Gym Environment) the wrapped environment
    """
    from common.atari_wrappers import wrap_deepmind
    return wrap_deepmind(env, frame_stack=True, scale=False)
示例#6
0
 def _thunk():
     env = gym.make(env_id)
     assert 'NoFrameskip' in env.spec.id
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env.seed(seed + rank)
     env = Monitor(env)
     return wrap_deepmind(env)
示例#7
0
    def make_env(self, rank):
        env = make_atari(self.config.env_name)
        env.seed(self.config.seed + rank)
        gym.logger.setLevel(logger.WARN)
        env = wrap_deepmind(env)

        # wrap the env one more time for getting total reward
        env = Monitor(env, rank)
        return env
示例#8
0
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     env = bench.Monitor(
         env, os.path.join(log_dir, "{}.monitor.json".format(rank)))
     # Ugly hack to detect atari.
     if hasattr(env.env, 'env') and hasattr(env.env.env, 'ale'):
         env = wrap_deepmind(env)
         env = WrapPyTorch(env)
     return env
示例#9
0
        def _thunk():
            env = make_atari(env_id) if env_type == 'atari' else gym.make(
                env_id)
            env.seed(seed + 10000 * mpi_rank +
                     rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir()
                          and os.path.join(logger.get_dir(),
                                           str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
示例#10
0
def create_gvgai_environment(env_id):
    from common.atari_wrappers import wrap_deepmind, make_atari, ActionDirectionEnv
    initial_direction = {'gvgai-testgame1': 3, 'gvgai-testgame2': 3}
    logger.configure()
    game_name = env_id.split('-lvl')[0]
    does_need_action_direction = False

    # Environment creation
    env = make_atari(env_id)
    env = bench.Monitor(env, logger.get_dir())
    env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=True)
    if game_name in initial_direction:
        print("We should model with action direction")
        env = ActionDirectionEnv(env, initial_direction=initial_direction[game_name])
        does_need_action_direction = True
    return env, does_need_action_direction, game_name
示例#11
0
def make_env(env_id,
             env_type,
             mpi_rank=0,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None,
             logger_dir=None,
             cloth_cfg_path=None,
             render_path=None,
             start_state_path=None):
    """Daniel: make single instance of env, to be wrapped in VecEnv for parallelism.

    We need to have a special if case for the clothenv, which doesn't actually
    use `gym.make(...)` because we have a custom configuration.
    """
    wrapper_kwargs = wrapper_kwargs or {}

    if env_type == 'cloth':
        print("Env Type is Cloth")
        assert cloth_cfg_path is not None
        from gym_cloth.envs import ClothEnv
        env = ClothEnv(cloth_cfg_path,
                       subrank=subrank,
                       start_state_path=start_state_path)
        print('Created ClothEnv, seed {}, mpi_rank {}, subrank {}.'.format(
            seed, mpi_rank, subrank))
        print('start_state_path: {}'.format(start_state_path))
        # Daniel: render, but currently only works if we have one env, not a vec ...
        if render_path is not None:
            env.render(filepath=render_path)
    elif env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    else:
        print("USING WRONG COMMAND")
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    #Adi: Let's return the actual env for now instead of the wrapped version for simplicity.  Can change this back later.
    env = env.unwrapped

    return env
示例#12
0
def wrap_train(env):
    from common.atari_wrappers import (wrap_deepmind, FrameStack)
    env = wrap_deepmind(env, clip_rewards=True)
    env = FrameStack(env, 4)
    return env
示例#13
0
def main(args):
    # mpi communicator.
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    # seed.
    workerseed = args.seed + 10000 * comm.Get_rank() if args.seed is not None else None
    if workerseed is not None:
        tc.manual_seed(workerseed % 2 ** 32)
        np.random.seed(workerseed % 2 ** 32)
        random.seed(workerseed % 2 ** 32)

    # logger.
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])

    # env.
    env = make_atari(args.env_name)
    env.seed(workerseed)
    env = Monitor(env, logger.get_dir() and
              os.path.join(logger.get_dir(), str(rank)))
    print(f"frame_stacking: {args.frame_stacking}")
    env = wrap_deepmind(env, frame_stack=args.frame_stacking,
                        clip_rewards=(args.mode =='train'),
                        episode_life=(args.mode =='train'))  # See Mnih et al., 2015 -> Methods -> Training Details.
    env.seed(workerseed)

    # agent.
    agent = CnnPolicy(
        img_channels=env.observation_space.shape[-1],
        num_actions=env.action_space.n,
        kind=args.model_type)

    # optimizer and scheduler.
    max_grad_steps = args.optim_epochs * args.env_steps // (comm.Get_size() * args.optim_batchsize)

    optimizer = tc.optim.Adam(agent.parameters(), lr=args.optim_stepsize, eps=1e-5)
    scheduler = tc.optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer, max_lr=args.optim_stepsize, total_steps=max_grad_steps,
        pct_start=0.0, anneal_strategy='linear', cycle_momentum=False,
        div_factor=1.0)

    # checkpoint.
    if rank == 0:
        try:
            state_dict = tc.load(os.path.join(args.checkpoint_dir, args.model_name, 'model.pth'))
            agent.load_state_dict(state_dict)
            print(f"Continuing from checkpoint found at {os.path.join(args.checkpoint_dir, args.model_name, 'model.pth')}")
        except FileNotFoundError:
            print("Bad checkpoint or none on process 0. Continuing from scratch.")

    # sync.
    with tc.no_grad():
        for p in agent.parameters():
            p_data = p.data.numpy()
            comm.Bcast(p_data, root=0)
            p.data.copy_(tc.tensor(p_data).float())

    # operations.
    if args.mode == 'train':
        learn(env=env, agent=agent, optimizer=optimizer, scheduler=scheduler, comm=comm,
              timesteps_per_actorbatch=args.timesteps_per_actorbatch, max_timesteps=args.env_steps,
              optim_epochs=args.optim_epochs, optim_batchsize=args.optim_batchsize,
              gamma=args.gamma, lam=args.lam, clip_param=args.epsilon, entcoeff=args.ent_coef,
              checkpoint_dir=args.checkpoint_dir, model_name=args.model_name)
        env.close()

    elif args.mode == 'play':
        if comm.Get_rank() == 0:
            play(env=env, agent=agent, args=args)
            env.close()

    elif args.mode == 'movie':
        if comm.Get_rank() == 0:
            movie(env=env, agent=agent, args=args)
            env.close()

    else:
        raise NotImplementedError("Mode of operation not supported!")
示例#14
0
def wrap_atari_dqn(env):
    from common.atari_wrappers import wrap_deepmind
    return wrap_deepmind(env, frame_stack=True, scale=False)
示例#15
0
def make_env(env_id,
             env_type,
             mpi_rank=0,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None,
             env_kwargs=None,
             logger_dir=None,
             initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*', '', env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    else:
        env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        env = FlattenObservation(env)

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
示例#16
0
def make_env(env_id, seed=None, max_episode_steps=None, wrapper_kwargs=None):
    wrapper_kwargs = wrapper_kwargs or {}
    env = make_atari(env_id, max_episode_steps)
    env.seed(seed)
    env = wrap_deepmind(env, **wrapper_kwargs)
    return env