def make_neyboy_environment(env_id, seed=0, rank=0, allow_early_resets=False, frame_skip=4, save_obs=False): env = gym.make(env_id) env = MaxAndSkipEnv(env, skip=frame_skip) env.seed(seed + rank) logdir = logger.get_dir() and os.path.join(logger.get_dir(), str(rank)) # env = wrappers.Monitor(env, logdir, force=True) env = Monitor(env, logdir, allow_early_resets=allow_early_resets) return env
def wrap_env_dqn(env): env = ThresholdResizeFrame(env) env = ClipRewardEnv(env) env = MaxAndSkipEnv(env, skip=4) env = Monitor(env, logger.get_dir(), allow_early_resets=True) env = FrameStack(env, 4) return env
def atari_wrap(env, max_episode_steps=None): env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) env = TimeLimitMask(env) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=150, height=100, grayscale=grayscale) env = FrameStack(env, frame_stack) env = ScaledFloatFrame(env) env = RewardScaler(env, scale=1 / 100.0) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=450, height=300, grayscale=False) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=reward_scale) return env
def make_env(env_name='PongNoFrameskip-v4', size=42, skip=4, is_train=True): env = gym.make(env_name) env = NoopResetEnv(env, noop_max=300) if is_train: env = MaxAndSkipEnv(env, skip=skip) env = WarpFrame(env, width=size, height=size, grayscale=True) # obs_space is now (84,84,1) env = ScaledFloatFrame(env) env = ChannelFirstFrameStack(env, 4) return env
def _wrap_deepmind_ram(env): """Applies various Atari-specific wrappers to make learning easier.""" env = EpisodicLifeEnv(env) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = ClipRewardEnv(env) return env
def make_atari(env_id): spec = gym_registry.spec(env_id) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None env = spec.make() assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=frame_skip) return env
def wrap_env_ppo(env): env = ThresholdResizeFrame(env) # env = WarpFrame(env) env = ClipRewardEnv(env) # env = NoopResetEnv(env, noop_max=8) env = MaxAndSkipEnv(env, skip=4) env = Monitor(env, logger.get_dir()) env = DummyVecEnv([lambda: env]) env = VecFrameStack(env, 4) return env
def wrap_env(env, episode_life=False): if episode_life: env = EpisodicLifeEnv(env) env = NoopResetEnv(env, 30) env = MaxAndSkipEnv(env, 4) if env.unwrapped.get_action_meanings()[1] == 'FIRE': env = FireResetEnv(env) env = WarpFrame(env) # , width=84, height=84) env = FrameStack(env, 4) env = ScaledFloatFrame(env) return env
def _thunk(): env = gym.make(**env_base) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = RewardCollector(env) env = EpisodicLifeEnv(env) env = ClipRewardEnv(env) env = WarpFrame(env) env = ScaledFloatFrame(env) env = TransposeImage(env) env = UnrealEnvBaseWrapper(env) return env
def create_env(self, env): env = gym.make(env) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = RewardCollector(env) env = EpisodicLifeEnv(env) env = ClipRewardEnv(env) env = WarpFrame(env) env = FrameStack(env, 4) env = ConvertToNumpy(env) env = TransposeImage(env) env = ScaledFloatFrame(env) return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) elif env_id == "tetris_single": env = TetrisSingleEnv(obs_type=obs_type, mode=mode) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) # if log_dir is not None: # env = bench.Monitor( # env, # os.path.join(log_dir, str(rank)), # allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif env_id.startswith("tetris"): # env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False) # env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=skip_frames) # pass if obs_type == "image": env = WarpFrame(env, 224, 224) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def wrap_n64(env, reward_scale=1 / 100.0, frame_skip=4, width=150, height=100, grayscale=True, normalize_observations=True): env = MaxAndSkipEnv(env, skip=frame_skip) env = WarpFrame(env, width=width, height=height, grayscale=grayscale) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=1 / 100.0) return env
def _thunk(): env = gym.make(env_id) env.seed(seed + rank) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=2) env = WarpFrame(env) # Janky Fix to Resize Environments to be 50x50 env.width = 50 env.height = 50 env = ScaledFloatFrame(env) if not eval: env = ClipRewardEnv(env) env = EpisodicLifeEnv(env) env = FrameStack(env, 3) env = TransposeOb(env) return env
def make_env(env_name='PongNoFrameskip-v4', size=84, skip=4, scale=True, is_train=True): env = gym.make(env_name) env = NoopResetEnv(env, noop_max=30) if is_train: env = MaxAndSkipEnv(env, skip=skip) if env.unwrapped.ale.lives() > 0: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=size, height=size, grayscale=True) # obs_space is now (84,84,1) if scale: env = ScaledFloatFrame(env) env = ChannelFirstFrameStack(env, 4) return env
def _thunk(): episodic_life = True env = gym.make(env_id) env.seed(seed + rank) env.frameskip = 1 if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank))) if episodic_life: env = EpisodicLifeEnv(env) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=1) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) if grey_scale: env = WarpMiniPacmanFrameGreyScale(env) else: env = WarpMiniPacmanFrameRGB(env) return env
num_atoms = 51 parser = argparse.ArgumentParser(description='C51-DQN Implementation Using Pytorch') parser.add_argument('env_name', type=str, help='gym id') parser.add_argument('--no-cuda', action='store_true', help='use to disable available CUDA') parser.add_argument('--minibatch-size', type=int, default=32, help='size of minibatch') parser.add_argument('--total-steps', type=int, default=int(4e7), help='Total steps taken during training') parser.add_argument('--lr', type=float, default=0.0001, help='learning rate') parser.add_argument('--render', action='store_true', help='render training environments') parser.add_argument('--gamma', type=int, default=4, help='number of steps between environment renders') parser.add_argument('--initial_epsilon', type=float, default=1.0, help='probability of selecting random action') parser.add_argument('--final_epsilon', type=int, default=0.0001, help='eventual decision randomness') args = parser.parse_args() def preprocessImage(img): img = np.rollaxis(img, 2, 0) #set to 3 x 210 x 160 #set up environment, initialize model env = gym.make(args.env_name) env = MaxAndSkipEnv(env) env = WarpFrame(env) #84x84 observation space from Mnih et al. env.reset() model = model.C51(env.action_space.n, num_atoms) total_steps = 0 r_t = 0 a_t = np.zeros(env.action_space.shape)
def make_env(args, env_id, seed, rank, log_dir, add_timestep, train=True, natural=False, clip_rewards=True, loader=None): # def _thunk(): if train: vid_path = KINETICS_PATH else: vid_path = KINETICS_PATH_TEST if env_id in IMG_ENVS: if env_id == 'mnist': channels = 2 else: channels = 4 env = ImgEnv(env_id, max_steps=args.max_steps, channels=channels, window=args.window, train=train) elif env_id in ['cityscapes']: env = DetectionEnv(env_id, max_steps=200, train=train) elif env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) is_mujoco = hasattr(gym.envs, 'mujoco') and isinstance( env.unwrapped, gym.envs.mujoco.MujocoEnv) if is_atari: env = make_atari(env_id) if natural and is_atari: env = ReplaceBackgroundEnv( env, BackgroundMattingWithColor((0, 0, 0)), #RandomColorSource(shape2d) #RandomImageSource(shape2d, glob.glob(COCO_PATH)) RandomVideoSource(env.observation_space.shape[:2], glob.glob(vid_path))) elif natural and is_mujoco: env.observation_space = Box( low=0, high=255, shape=(128, 128, 3), dtype=np.uint8) env = ReplaceMuJoCoBackgroundEnv( env, BackgroundMattingWithColor((0, 0, 0)), #RandomColorSource(shape2d) #RandomImageSource(shape2d, glob.glob(COCO_PATH)) RandomVideoSource(env.observation_space.shape[:2], glob.glob(vid_path))) elif is_mujoco: env.observation_space = Box( low=0, high=255, shape=(128, 128, 3), dtype=np.uint8) env = PixelMujoCoEnv(env) env.seed(seed + rank) obs_shape = env.observation_space.shape if add_timestep and len( obs_shape) == 1 and str(env).find('TimeLimit') > -1: env = AddTimestep(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank))) if is_atari: env = wrap_deepmind(env, clip_rewards=clip_rewards) if is_mujoco: env = ClipRewardEnv(WarpFrame(MaxAndSkipEnv(env, skip=4))) # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = WrapPyTorch(env) return env
def atari_setup(env): # from baselines.common.atari_wrappers assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) return env
def make_atari(env_id): env = gym.make(env_id) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noops=240) env = MaxAndSkipEnv(env, skip=4) return env