示例#1
0
def make_neyboy_environment(env_id,
                            seed=0,
                            rank=0,
                            allow_early_resets=False,
                            frame_skip=4,
                            save_obs=False):
    env = gym.make(env_id)
    env = MaxAndSkipEnv(env, skip=frame_skip)
    env.seed(seed + rank)
    logdir = logger.get_dir() and os.path.join(logger.get_dir(), str(rank))
    # env = wrappers.Monitor(env, logdir, force=True)
    env = Monitor(env, logdir, allow_early_resets=allow_early_resets)
    return env
示例#2
0
def wrap_env_dqn(env):
    env = ThresholdResizeFrame(env)
    env = ClipRewardEnv(env)
    env = MaxAndSkipEnv(env, skip=4)
    env = Monitor(env, logger.get_dir(), allow_early_resets=True)
    env = FrameStack(env, 4)
    return env
示例#3
0
def atari_wrap(env, max_episode_steps=None):
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
        env = TimeLimitMask(env)
    return env
示例#4
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=150, height=100, grayscale=grayscale)
     env = FrameStack(env, frame_stack)
     env = ScaledFloatFrame(env)
     env = RewardScaler(env, scale=1 / 100.0)
     return env
示例#5
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=450, height=300, grayscale=False)
     env = ScaledFloatFrame(env)
     if normalize_observations:
         env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
     env = RewardScaler(env, scale=reward_scale)
     return env
def make_env(env_name='PongNoFrameskip-v4', size=42, skip=4, is_train=True):
    env = gym.make(env_name)
    env = NoopResetEnv(env, noop_max=300)
    if is_train:
        env = MaxAndSkipEnv(env, skip=skip)
    env = WarpFrame(env, width=size, height=size, grayscale=True) # obs_space is now (84,84,1)
    env = ScaledFloatFrame(env)
    env = ChannelFirstFrameStack(env, 4)
    return env
示例#7
0
def _wrap_deepmind_ram(env):
    """Applies various Atari-specific wrappers to make learning easier."""
    env = EpisodicLifeEnv(env)
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = ClipRewardEnv(env)
    return env
示例#8
0
def make_atari(env_id):
    spec = gym_registry.spec(env_id)
    # not actually needed, but we feel safer
    spec.max_episode_steps = None
    spec.max_episode_time = None
    env = spec.make()
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=frame_skip)
    return env
示例#9
0
def wrap_env_ppo(env):
    env = ThresholdResizeFrame(env)
    # env = WarpFrame(env)
    env = ClipRewardEnv(env)
    # env = NoopResetEnv(env, noop_max=8)
    env = MaxAndSkipEnv(env, skip=4)
    env = Monitor(env, logger.get_dir())
    env = DummyVecEnv([lambda: env])
    env = VecFrameStack(env, 4)
    return env
def wrap_env(env, episode_life=False):
    if episode_life:
        env = EpisodicLifeEnv(env)
    env = NoopResetEnv(env, 30)
    env = MaxAndSkipEnv(env, 4)
    if env.unwrapped.get_action_meanings()[1] == 'FIRE':
        env = FireResetEnv(env)
    env = WarpFrame(env)  # , width=84, height=84)
    env = FrameStack(env, 4)
    env = ScaledFloatFrame(env)
    return env
示例#11
0
 def _thunk():
     env = gym.make(**env_base)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = RewardCollector(env)
     env = EpisodicLifeEnv(env)
     env = ClipRewardEnv(env)
     env = WarpFrame(env)
     env = ScaledFloatFrame(env)
     env = TransposeImage(env)
     env = UnrealEnvBaseWrapper(env)
     return env
示例#12
0
 def create_env(self, env):
     env = gym.make(env)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = RewardCollector(env)
     env = EpisodicLifeEnv(env)
     env = ClipRewardEnv(env)
     env = WarpFrame(env)
     env = FrameStack(env, 4)
     env = ConvertToNumpy(env)
     env = TransposeImage(env)
     env = ScaledFloatFrame(env)
     return env
示例#13
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)

        elif env_id == "tetris_single":
            env = TetrisSingleEnv(obs_type=obs_type, mode=mode)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        # if log_dir is not None:
        #     env = bench.Monitor(
        #         env,
        #         os.path.join(log_dir, str(rank)),
        #         allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif env_id.startswith("tetris"):
            # env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False)
            # env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=skip_frames)
            # pass
            if obs_type == "image":
                env = WarpFrame(env, 224, 224)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
示例#14
0
def wrap_n64(env,
             reward_scale=1 / 100.0,
             frame_skip=4,
             width=150,
             height=100,
             grayscale=True,
             normalize_observations=True):
    env = MaxAndSkipEnv(env, skip=frame_skip)
    env = WarpFrame(env, width=width, height=height, grayscale=grayscale)
    env = ScaledFloatFrame(env)
    if normalize_observations:
        env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
    env = RewardScaler(env, scale=1 / 100.0)
    return env
示例#15
0
文件: envs.py 项目: biggzlar/i2a
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=2)
     env = WarpFrame(env)
     # Janky Fix to Resize Environments to be 50x50
     env.width = 50
     env.height = 50
     env = ScaledFloatFrame(env)
     if not eval:
         env = ClipRewardEnv(env)
         env = EpisodicLifeEnv(env)
     env = FrameStack(env, 3)
     env = TransposeOb(env)
     return env
示例#16
0
def make_env(env_name='PongNoFrameskip-v4',
             size=84,
             skip=4,
             scale=True,
             is_train=True):
    env = gym.make(env_name)
    env = NoopResetEnv(env, noop_max=30)
    if is_train:
        env = MaxAndSkipEnv(env, skip=skip)
    if env.unwrapped.ale.lives() > 0:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env, width=size, height=size,
                    grayscale=True)  # obs_space is now (84,84,1)
    if scale:
        env = ScaledFloatFrame(env)
    env = ChannelFirstFrameStack(env, 4)
    return env
示例#17
0
    def _thunk():
        episodic_life = True
        env = gym.make(env_id)

        env.seed(seed + rank)
        env.frameskip = 1

        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))
        if episodic_life:
            env = EpisodicLifeEnv(env)

        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=1)

        if 'FIRE' in env.unwrapped.get_action_meanings():
            env = FireResetEnv(env)
        if grey_scale:
            env = WarpMiniPacmanFrameGreyScale(env)
        else:
            env = WarpMiniPacmanFrameRGB(env)
        return env
示例#18
0
num_atoms = 51

parser = argparse.ArgumentParser(description='C51-DQN Implementation Using Pytorch')
parser.add_argument('env_name', type=str, help='gym id')
parser.add_argument('--no-cuda', action='store_true', help='use to disable available CUDA')
parser.add_argument('--minibatch-size', type=int, default=32, help='size of minibatch')
parser.add_argument('--total-steps', type=int, default=int(4e7), help='Total steps taken during training')
parser.add_argument('--lr', type=float, default=0.0001, help='learning rate')
parser.add_argument('--render', action='store_true', help='render training environments')
parser.add_argument('--gamma', type=int, default=4, help='number of steps between environment renders')
parser.add_argument('--initial_epsilon', type=float, default=1.0, help='probability of selecting random action')
parser.add_argument('--final_epsilon', type=int, default=0.0001, help='eventual decision randomness')

args = parser.parse_args()

def preprocessImage(img):
    img = np.rollaxis(img, 2, 0) #set to 3 x 210 x 160



#set up environment, initialize model
env = gym.make(args.env_name)
env = MaxAndSkipEnv(env)
env = WarpFrame(env) #84x84 observation space from Mnih et al.
env.reset()
model = model.C51(env.action_space.n, num_atoms)

total_steps = 0
r_t = 0
a_t = np.zeros(env.action_space.shape)
示例#19
0
def make_env(args, env_id, seed, rank, log_dir, add_timestep, train=True,
             natural=False, clip_rewards=True, loader=None):
    # def _thunk():
    if train:
        vid_path = KINETICS_PATH
    else:
        vid_path = KINETICS_PATH_TEST
    if env_id in IMG_ENVS:
        if env_id == 'mnist':
            channels = 2
        else:
            channels = 4
        env = ImgEnv(env_id, max_steps=args.max_steps, channels=channels,
                     window=args.window, train=train)
    elif env_id in ['cityscapes']:
        env = DetectionEnv(env_id, max_steps=200, train=train)
    elif env_id.startswith("dm"):
        _, domain, task = env_id.split('.')
        env = dm_control2gym.make(domain_name=domain, task_name=task)
    else:
        env = gym.make(env_id)
    is_atari = hasattr(gym.envs, 'atari') and isinstance(
        env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
    is_mujoco = hasattr(gym.envs, 'mujoco') and isinstance(
        env.unwrapped, gym.envs.mujoco.MujocoEnv)
    if is_atari:
        env = make_atari(env_id)
    if natural and is_atari:
        env = ReplaceBackgroundEnv(
            env,
            BackgroundMattingWithColor((0, 0, 0)),
            #RandomColorSource(shape2d)
            #RandomImageSource(shape2d, glob.glob(COCO_PATH))
            RandomVideoSource(env.observation_space.shape[:2],
                              glob.glob(vid_path)))
    elif natural and is_mujoco:
        env.observation_space = Box(
            low=0, high=255, shape=(128, 128, 3), dtype=np.uint8)
        env = ReplaceMuJoCoBackgroundEnv(
            env,
            BackgroundMattingWithColor((0, 0, 0)),
            #RandomColorSource(shape2d)
            #RandomImageSource(shape2d, glob.glob(COCO_PATH))
            RandomVideoSource(env.observation_space.shape[:2],
                              glob.glob(vid_path)))
    elif is_mujoco:
        env.observation_space = Box(
            low=0, high=255, shape=(128, 128, 3), dtype=np.uint8)
        env = PixelMujoCoEnv(env)
    env.seed(seed + rank)

    obs_shape = env.observation_space.shape
    if add_timestep and len(
            obs_shape) == 1 and str(env).find('TimeLimit') > -1:
        env = AddTimestep(env)

    if log_dir is not None:
        env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

    if is_atari:
        env = wrap_deepmind(env, clip_rewards=clip_rewards)
    if is_mujoco:
        env = ClipRewardEnv(WarpFrame(MaxAndSkipEnv(env, skip=4)))
    # If the input has shape (W,H,3), wrap for PyTorch convolutions
    obs_shape = env.observation_space.shape
    if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
        env = WrapPyTorch(env)

    return env
示例#20
0
def atari_setup(env):
    # from baselines.common.atari_wrappers
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    return env
示例#21
0
def make_atari(env_id):
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noops=240)
    env = MaxAndSkipEnv(env, skip=4)
    return env