def test_frame_stack(): base_obs_space = {"a{}".format(idx): Box(low=np.float32(0.0), high=np.float32(10.0), shape=[2, 3]) for idx in range(2)} base_obs = {"a{}".format(idx): np.zeros([2, 3]) + np.arange(3) + idx for idx in range(2)} base_env = DummyEnv(base_obs, base_obs_space, base_act_spaces) env = frame_stack_v1(base_env, 4) obs = env.reset() obs, _, _, _ = env.last() assert obs.shape == (2, 3, 4) env.step(2) first_obs, _, _, _ = env.last() print(first_obs[:, :, -1]) assert np.all(np.equal(first_obs[:, :, -1], base_obs["a1"])) assert np.all(np.equal(first_obs[:, :, :-1], 0)) base_obs = {"a{}".format(idx): idx + 3 for idx in range(2)} base_env = DummyEnv(base_obs, base_act_spaces, base_act_spaces) env = frame_stack_v1(base_env, 4) obs = env.reset() obs, _, _, _ = env.last() assert env.observation_spaces[env.agent_selection].n == 5 ** 4 env.step(2) first_obs, _, _, _ = env.last() assert first_obs == 4 env.step(2) second_obs, _, _, _ = env.last() assert second_obs == 3 + 3 * 5 for x in range(8): nth_obs = env.step(2) nth_obs, _, _, _ = env.last() assert nth_obs == ((3 * 5 + 3) * 5 + 3) * 5 + 3
def env_creator(): if args.game.__package__.endswith('atari'): if (args.game_name.startswith('foozpong') or args.game_name.startswith('basketball_pong') or args.game_name.startswith('volleyball_pong') ): env = args.game.env(obs_type=args.atari_obs_type, max_cycles=args.max_steps['atari'], full_action_space=False, num_players=2) else: env = args.game.env(obs_type=args.atari_obs_type, full_action_space=False, max_cycles=args.max_steps['atari']) env = frame_skip_v0(env, args.atari_frame_skip_num) env = frame_stack_v1(env, args.atari_frame_stack_num) else: env = args.game.env() if args.game_name.startswith('rps'): env = one_hot_obs_wrapper(env) env = dtype_v0(env, dtype=float32) env = pad_observations_v0(env) env = pad_action_space_v0(env) if args.game_name.startswith('connect_four') or args.game_name.startswith('tictactoe'): env = FlattenEnvWrapper(env) GAUSSIAN_STD = 1.0 assert abs(GAUSSIAN_STD - 1.0) < 1e-5, "must be 1.0, otherwise simple ensemble implementation is wrong" env = LatentGaussianAugmentedEnvWrapper(env, latent_parameter_dim=args.latent_para_dim, gaussian_std=1.0, use_dict_obs_space=args.use_dict_obs_space) return env
def atari_preprocessing( env: Union[ParallelEnvWrapper, SequentialEnvWrapper] ) -> Union[ParallelEnvWrapper, SequentialEnvWrapper]: # Preprocessing env = supersuit.max_observation_v0(env, 2) # repeat_action_probability is set to 0.25 # to introduce non-determinism to the system env = supersuit.sticky_actions_v0(env, repeat_action_probability=0.25) # skip frames for faster processing and less control # to be compatable with gym, use frame_skip(env, (2,5)) env = supersuit.frame_skip_v0(env, 4) # downscale observation for faster processing env = supersuit.resize_v0(env, 84, 84) # allow agent to see everything on the screen # despite Atari's flickering screen problem env = supersuit.frame_stack_v1(env, 4) # set dtype to float32 env = supersuit.dtype_v0(env, np.float32) return env
def run_parallel2(args): """ Test parallel mode with supersuit env wrappers. """ parallel_env = eval(args.env).parallel_env() # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames # to deal with frame flickering env = supersuit.max_observation_v0(parallel_env, 2) # repeat_action_probability is set to 0.25 to introduce non-determinism to the system env = supersuit.sticky_actions_v0(env, repeat_action_probability=0.25) # skip frames for faster processing and less control # to be compatable with gym, use frame_skip(env, (2,5)) env = supersuit.frame_skip_v0(env, 4) # downscale observation for faster processing env = supersuit.resize_v0(env, 84, 84) # allow agent to see everything on the screen despite Atari's flickering screen problem parallel_env = supersuit.frame_stack_v1(env, 4) parallel_env.seed(1) observations = parallel_env.reset() print(parallel_env.agents) max_cycles = 500 for step in range(max_cycles): actions = {agent: 1 for agent in parallel_env.agents} observations, rewards, dones, infos = parallel_env.step(actions) parallel_env.render()
def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False) -> VecEnv: env = pistonball_v5.parallel_env() env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v0(env, x_size=84, y_size=84, linear_interp=True) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v1(env) print(n_envs) env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=4, base_class="stable_baselines3") env = VecMonitor(env) env = self._maybe_normalize(env, eval_env) if is_image_space( env.observation_space) and not is_image_space_channels_first( env.observation_space): if self.verbose > 0: print("Wrapping into a VecTransposeImage") env = VecTransposeImage(env) return env
def env_creator(): env = pistonball_v4.env(n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125) env = ss.color_reduction_v0(env, mode='B') env = ss.dtype_v0(env, 'float32') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.normalize_obs_v0(env, env_min=0, env_max=1) env = ss.frame_stack_v1(env, 3) return env
def env_fn(): env = AtariWrapper(gym.make("SpaceInvadersNoFrameskip-v4"), clip_reward=False) env = supersuit.frame_stack_v1(env, 4) env = supersuit.observation_lambda_v0( env, lambda obs: np.transpose(obs, axes=(2, 0, 1))) # env = supersuit.dtype_v0(env,np.float32) # env = supersuit.normalize_obs_v0(env) return env
def env_creator(args): env = game_env.env(obs_type='grayscale_image') #env = clip_reward_v0(env, lower_bound=-1, upper_bound=1) env = sticky_actions_v0(env, repeat_action_probability=0.25) env = resize_v0(env, 84, 84) #env = color_reduction_v0(env, mode='full') env = frame_skip_v0(env, 4) env = frame_stack_v1(env, 4) env = agent_indicator_v0(env, type_only=False) return env
def get_env(config): name = env_name.replace('-', '_') env = __import__(f'pettingzoo.atari.{name}', fromlist=[None]) env = env.parallel_env(obs_type='grayscale_image') env = frame_skip_v0(env, 4) env = resize_v0(env, 84, 84) env = frame_stack_v1(env, 4) env = agent_indicator_v0(env) return ParallelPettingZooEnv( env, random_action=config['random_action'], random_proba=config['random_action_probability'])
def make_env(env_name='boxing_v1', seed=1, obs_type='rgb_image'): '''https://www.pettingzoo.ml/atari''' if env_name == 'slimevolley_v0': env = SlimeVolleyWrapper(gym.make("SlimeVolley-v0")) else: # PettingZoo envs env = eval(env_name).parallel_env(obs_type=obs_type) if obs_type == 'rgb_image': # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames # to deal with frame flickering env = supersuit.max_observation_v0(env, 2) # repeat_action_probability is set to 0.25 to introduce non-determinism to the system env = supersuit.sticky_actions_v0(env, repeat_action_probability=0.25) # skip frames for faster processing and less control # to be compatable with gym, use frame_skip(env, (2,5)) env = supersuit.frame_skip_v0(env, 4) # downscale observation for faster processing env = supersuit.resize_v0(env, 84, 84) # allow agent to see everything on the screen despite Atari's flickering screen problem env = supersuit.frame_stack_v1(env, 4) else: env = supersuit.frame_skip_v0(env, 4) # env = PettingZooWrapper(env) # need to be put at the end if env_name in AtariEnvs: # normalize the observation of Atari for both image or RAM env = supersuit.dtype_v0( env, 'float32' ) # need to transform uint8 to float first for normalizing observation: https://github.com/PettingZoo-Team/SuperSuit env = supersuit.normalize_obs_v0( env, env_min=0, env_max=1) # normalize the observation to (0,1) # assign observation and action spaces env.observation_space = list(env.observation_spaces.values())[0] env.action_space = list(env.action_spaces.values())[0] env.seed(seed) return env
def unwrapped_check(env): # image observations if isinstance(env.observation_space, spaces.Box): if ((env.observation_space.low.shape == 3) and (env.observation_space.low == 0).all() and (len(env.observation_space.shape[2]) == 3) and (env.observation_space.high == 255).all()): env = max_observation_v0(env, 2) env = color_reduction_v0(env, mode="full") env = normalize_obs_v0(env) # box action spaces if isinstance(env.action_space, spaces.Box): env = clip_actions_v0(env) env = scale_actions_v0(env, 0.5) # stackable observations if isinstance(env.observation_space, spaces.Box) or isinstance( env.observation_space, spaces.Discrete): env = frame_stack_v1(env, 2) # not discrete and not multibinary observations if not isinstance(env.observation_space, spaces.Discrete) and not isinstance( env.observation_space, spaces.MultiBinary): env = dtype_v0(env, np.float16) env = flatten_v0(env) env = frame_skip_v0(env, 2) # everything else env = clip_reward_v0(env, lower_bound=-1, upper_bound=1) env = delay_observations_v0(env, 2) env = sticky_actions_v0(env, 0.5) env = nan_random_v0(env) env = nan_zeros_v0(env) assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
def wrap_env(env, obs_type='ram'): env = env.parallel_env(obs_type=obs_type) env_agents = env.unwrapped.agents if obs_type == 'rgb_image': env = supersuit.max_observation_v0( env, 2 ) # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames to deal with frame flickering env = supersuit.sticky_actions_v0( env, repeat_action_probability=0.25 ) # repeat_action_probability is set to 0.25 to introduce non-determinism to the system env = supersuit.frame_skip_v0( env, 4 ) # skip frames for faster processing and less control to be compatable with gym, use frame_skip(env, (2,5)) env = supersuit.resize_v0( env, 84, 84) # downscale observation for faster processing env = supersuit.frame_stack_v1( env, 4 ) # allow agent to see everything on the screen despite Atari's flickering screen problem else: env = supersuit.frame_skip_v0( env, 4 ) # RAM version also need frame skip, essential for boxing-v1, etc # normalize the observation of Atari for both image or RAM env = supersuit.dtype_v0( env, 'float32' ) # need to transform uint8 to float first for normalizing observation: https://github.com/PettingZoo-Team/SuperSuit env = supersuit.normalize_obs_v0( env, env_min=0, env_max=1) # normalize the observation to (0,1) env.observation_space = list(env.observation_spaces.values())[0] env.action_space = list(env.action_spaces.values())[0] env.agents = env_agents env = Dict2TupleWrapper(env) return env
def unwrapped_check(env): env.reset() agents = env.agents if image_observation(env, agents): env = max_observation_v0(env, 2) env = color_reduction_v0(env, mode="full") env = normalize_obs_v0(env) if box_action(env, agents): env = clip_actions_v0(env) env = scale_actions_v0(env, 0.5) if observation_homogenizable(env, agents): env = pad_observations_v0(env) env = frame_stack_v1(env, 2) env = agent_indicator_v0(env) env = black_death_v3(env) if (not_dict_observation(env, agents) and not_discrete_observation(env, agents) and not_multibinary_observation(env, agents)): env = dtype_v0(env, np.float16) env = flatten_v0(env) env = frame_skip_v0(env, 2) if action_homogenizable(env, agents): env = pad_action_space_v0(env) env = clip_reward_v0(env, lower_bound=-1, upper_bound=1) env = delay_observations_v0(env, 2) env = sticky_actions_v0(env, 0.5) env = nan_random_v0(env) env = nan_zeros_v0(env) assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
return DummyEnv(base_obs, base_obs_space, base_act_spaces) def new_dummy(): return DummyEnv(base_obs, base_obs_space, base_act_spaces) wrappers = [ supersuit.color_reduction_v0(new_dummy(), "R"), supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10), supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10, linear_interp=True), supersuit.dtype_v0(new_dummy(), np.int32), supersuit.flatten_v0(new_dummy()), supersuit.reshape_v0(new_dummy(), (64, 3)), supersuit.normalize_obs_v0(new_dummy(), env_min=-1, env_max=5.0), supersuit.frame_stack_v1(new_dummy(), 8), supersuit.reward_lambda_v0(new_dummy(), lambda x: x / 10), supersuit.clip_reward_v0(new_dummy()), supersuit.clip_actions_v0(new_continuous_dummy()), supersuit.frame_skip_v0(new_dummy(), 4), supersuit.frame_skip_v0(new_dummy(), (4, 6)), supersuit.sticky_actions_v0(new_dummy(), 0.75), supersuit.delay_observations_v0(new_dummy(), 1), ] @pytest.mark.parametrize("env", wrappers) def test_basic_wrappers(env): env.seed(5) obs = env.reset() act_space = env.action_space
save_code=True) writer = SummaryWriter(f"/tmp/{experiment_name}") # TRY NOT TO MODIFY: seeding device = torch.device( 'cuda' if torch.cuda.is_available() and args.cuda else 'cpu') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic # petting zoo env = pistonball_v4.parallel_env() env = ss.color_reduction_v0(env, mode='B') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v0(env) envs = ss.concat_vec_envs_v0(env, args.num_envs, num_cpus=0, base_class='stable_baselines3') envs = VecMonitor(envs) if args.capture_video: envs = VecVideoRecorder(envs, f'videos/{experiment_name}', record_video_trigger=lambda x: x % 150000 == 0, video_length=400) envs = VecPyTorch(envs, device) args.num_envs = envs.num_envs args.batch_size = int(args.num_envs * args.num_steps) args.minibatch_size = int(args.batch_size // args.n_minibatch)
generated_agents_env_v0, ) import supersuit from supersuit import dtype_v0 import pytest wrappers = [ supersuit.dtype_v0(generated_agents_parallel_v0.env(), np.int32), supersuit.flatten_v0(generated_agents_parallel_v0.env()), supersuit.normalize_obs_v0( dtype_v0(generated_agents_parallel_v0.env(), np.float32), env_min=-1, env_max=5.0, ), supersuit.frame_stack_v1(generated_agents_parallel_v0.env(), 8), supersuit.reward_lambda_v0(generated_agents_parallel_v0.env(), lambda x: x / 10), supersuit.clip_reward_v0(generated_agents_parallel_v0.env()), supersuit.nan_noop_v0(generated_agents_parallel_v0.env(), 0), supersuit.nan_zeros_v0(generated_agents_parallel_v0.env()), supersuit.nan_random_v0(generated_agents_parallel_v0.env()), supersuit.frame_skip_v0(generated_agents_parallel_v0.env(), 4), supersuit.sticky_actions_v0(generated_agents_parallel_v0.env(), 0.75), supersuit.delay_observations_v0(generated_agents_parallel_v0.env(), 3), supersuit.max_observation_v0(generated_agents_parallel_v0.env(), 3), ] @pytest.mark.parametrize("env", wrappers) def test_pettingzoo_aec_api_par_gen(env):
def test_pettinzoo_frame_stack(): _env = simple_push_v2.env() wrapped_env = frame_stack_v1(_env) api_test.api_test(wrapped_env)
print(params) def image_transpose(env): if is_image_space(env.observation_space) and not is_image_space_channels_first( env.observation_space ): env = VecTransposeImage(env) return env env = pistonball_v5.parallel_env() env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v1(env) env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=1, base_class="stable_baselines3") env = VecMonitor(env) env = image_transpose(env) eval_env = pistonball_v5.parallel_env() eval_env = ss.color_reduction_v0(eval_env, mode="B") eval_env = ss.resize_v0(eval_env, x_size=84, y_size=84) eval_env = ss.frame_stack_v1(eval_env, 3) eval_env = ss.pettingzoo_env_to_vec_env_v1(eval_env) eval_env = ss.concat_vec_envs_v1( eval_env, 1, num_cpus=1, base_class="stable_baselines3" ) eval_env = VecMonitor(eval_env) eval_env = image_transpose(eval_env)
n_evaluations = 20 n_agents = 2 n_envs = 4 n_timesteps = 8000000 # n agents, n timesteps, docs, make, PZ import in test file # The main class SumoEnvironment inherits MultiAgentEnv from RLlib. base_env = make_env(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', out_csv_name='outputs/4x4grid/test', use_gui=False, num_seconds=80000) env = base_env.copy().parallel_env() env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v0(env) env = ss.concat_vec_envs_v0(env, n_envs, num_cpus=1, base_class='stable_baselines3') env = VecMonitor(env) eval_env = base_env.copy().parallel_env() eval_env = ss.frame_stack_v1(eval_env, 3) eval_env = ss.pettingzoo_env_to_vec_env_v0(eval_env) eval_env = ss.concat_vec_envs_v0(eval_env, 1, num_cpus=1, base_class='stable_baselines3') eval_env = VecMonitor(eval_env) eval_freq = int(n_timesteps / n_evaluations) eval_freq = max(eval_freq // (n_envs*n_agents), 1) model = PPO("MlpPolicy", env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256) eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=eval_freq, deterministic=True, render=False)
supersuit.resize_v1( dtype_v0(knights_archers_zombies_v10.env(vector_state=False), np.uint8), x_size=5, y_size=10, linear_interp=True, ), supersuit.dtype_v0(knights_archers_zombies_v10.env(), np.int32), supersuit.flatten_v0(knights_archers_zombies_v10.env()), supersuit.reshape_v0(knights_archers_zombies_v10.env(vector_state=False), (512 * 512, 3)), supersuit.normalize_obs_v0(dtype_v0(knights_archers_zombies_v10.env(), np.float32), env_min=-1, env_max=5.0), supersuit.frame_stack_v1(combined_arms_v6.env(), 8), supersuit.pad_observations_v0(simple_world_comm_v2.env()), supersuit.pad_action_space_v0(simple_world_comm_v2.env()), supersuit.black_death_v3(combined_arms_v6.env()), supersuit.agent_indicator_v0(knights_archers_zombies_v10.env(), True), supersuit.agent_indicator_v0(knights_archers_zombies_v10.env(), False), supersuit.reward_lambda_v0(knights_archers_zombies_v10.env(), lambda x: x / 10), supersuit.clip_reward_v0(combined_arms_v6.env()), supersuit.nan_noop_v0(knights_archers_zombies_v10.env(), 0), supersuit.nan_zeros_v0(knights_archers_zombies_v10.env()), supersuit.nan_random_v0(chess_v5.env()), supersuit.nan_random_v0(knights_archers_zombies_v10.env()), supersuit.frame_skip_v0(combined_arms_v6.env(), 4), supersuit.sticky_actions_v0(combined_arms_v6.env(), 0.75), supersuit.delay_observations_v0(combined_arms_v6.env(), 3),
supersuit.color_reduction_v0(knights_archers_zombies_v4.env(), "R"), supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8), x_size=5, y_size=10), supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8), x_size=5, y_size=10, linear_interp=True), supersuit.dtype_v0(knights_archers_zombies_v4.env(), np.int32), supersuit.flatten_v0(knights_archers_zombies_v4.env()), supersuit.reshape_v0(knights_archers_zombies_v4.env(), (512 * 512, 3)), supersuit.normalize_obs_v0(dtype_v0(knights_archers_zombies_v4.env(), np.float32), env_min=-1, env_max=5.0), supersuit.frame_stack_v1(knights_archers_zombies_v4.env(), 8), supersuit.pad_observations_v0(knights_archers_zombies_v4.env()), supersuit.pad_action_space_v0(knights_archers_zombies_v4.env()), supersuit.black_death_v0(knights_archers_zombies_v4.env()), supersuit.agent_indicator_v0(knights_archers_zombies_v4.env(), True), supersuit.agent_indicator_v0(knights_archers_zombies_v4.env(), False), supersuit.reward_lambda_v0(knights_archers_zombies_v4.env(), lambda x: x / 10), supersuit.clip_reward_v0(knights_archers_zombies_v4.env()), supersuit.clip_actions_v0(prison_v2.env(continuous=True)), supersuit.frame_skip_v0(knights_archers_zombies_v4.env(), 4), supersuit.sticky_actions_v0(knights_archers_zombies_v4.env(), 0.75), supersuit.delay_observations_v0(knights_archers_zombies_v4.env(), 3), ]
def create_single_env(args): env_name = args.env if args.num_envs > 1: keep_info = True # keep_info True to maintain dict type for parallel envs (otherwise cannot pass VectorEnv wrapper) else: keep_info = False '''https://www.pettingzoo.ml/atari''' if "slimevolley" in env_name or "SlimeVolley" in env_name: print(f'Load SlimeVolley env: {env_name}') env = gym.make(env_name) if env_name in [ 'SlimeVolleySurvivalNoFrameskip-v0', 'SlimeVolleyNoFrameskip-v0', 'SlimeVolleyPixel-v0' ]: # For image-based envs, apply following wrappers (from gym atari) to achieve pettingzoo style env, # or use supersuit (requires input env to be either pettingzoo or gym env). # same as: https://github.com/hardmaru/slimevolleygym/blob/master/training_scripts/train_ppo_pixel.py # TODO Note: this cannot handle the two obervations in above SlimeVolley envs, # since the wrappers are for single agent. if env_name != 'SlimeVolleyPixel-v0': env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env) # #env = ClipRewardEnv(env) env = FrameStack(env, 4) env = SlimeVolleyWrapper( env, args.against_baseline) # slimevolley to pettingzoo style env = NFSPPettingZooWrapper( env, keep_info=keep_info ) # pettingzoo to nfsp style, keep_info True to maintain dict type for parallel envs elif env_name in AtariEnvs: # PettingZoo Atari envs print(f'Load PettingZoo Atari env: {env_name}') if args.ram: obs_type = 'ram' else: obs_type = 'rgb_image' env = eval(env_name).parallel_env(obs_type=obs_type) env_agents = env.unwrapped.agents # this cannot go through supersuit wrapper, so get it first and reassign it if obs_type == 'rgb_image': # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames # to deal with frame flickering env = supersuit.max_observation_v0(env, 2) # repeat_action_probability is set to 0.25 to introduce non-determinism to the system env = supersuit.sticky_actions_v0(env, repeat_action_probability=0.25) # skip frames for faster processing and less control # to be compatable with gym, use frame_skip(env, (2,5)) env = supersuit.frame_skip_v0(env, 4) # downscale observation for faster processing env = supersuit.resize_v0(env, 84, 84) # allow agent to see everything on the screen despite Atari's flickering screen problem env = supersuit.frame_stack_v1(env, 4) else: env = supersuit.frame_skip_v0( env, 4 ) # RAM version also need frame skip, essential for boxing-v1, etc # env = PettingZooWrapper(env) # need to be put at the end # normalize the observation of Atari for both image or RAM env = supersuit.dtype_v0( env, 'float32' ) # need to transform uint8 to float first for normalizing observation: https://github.com/PettingZoo-Team/SuperSuit env = supersuit.normalize_obs_v0( env, env_min=0, env_max=1) # normalize the observation to (0,1) # assign observation and action spaces env.observation_space = list(env.observation_spaces.values())[0] env.action_space = list(env.action_spaces.values())[0] env.agents = env_agents env = NFSPPettingZooWrapper( env, keep_info=keep_info ) # pettingzoo to nfsp style, keep_info True to maintain dict type for parallel envs) elif env_name in ClassicEnvs: # PettingZoo Classic envs print(f'Load PettingZoo Classic env: {env_name}') if env_name in ['rps_v1', 'rpsls_v1']: env = eval(env_name).parallel_env() env = PettingzooClassicWrapper(env, observation_mask=1.) else: # only rps_v1 can use parallel_env at present env = eval(env_name).env() env = PettingzooClassic_Iterate2Parallel( env, observation_mask=None ) # since Classic games do not support Parallel API yet env = NFSPPettingZooWrapper(env, keep_info=keep_info) elif "LaserTag" in env_name: # LaserTag: https://github.com/younggyoseo/pytorch-nfsp print(f'Load LaserTag env: {env_name}') env = gym.make(env_name) env = wrap_pytorch(env) else: # gym env print(f'Load Gym env: {env_name}') try: env = gym.make(env_name) except: print(f"Error: No such env: {env_name}!") # may need more wrappers here, e.g. Pong-ram-v0 need scaled observation! # Ref: https://towardsdatascience.com/deep-q-network-dqn-i-bce08bdf2af env = NFSPAtariWrapper(env, keep_info=keep_info) env.seed(args.seed) return env