示例#1
0
    def create_envs(self,
                    n_envs: int,
                    eval_env: bool = False,
                    no_log: bool = False) -> VecEnv:

        env = pistonball_v5.parallel_env()
        env = ss.color_reduction_v0(env, mode="B")
        env = ss.resize_v0(env, x_size=84, y_size=84, linear_interp=True)
        env = ss.frame_stack_v1(env, 3)
        env = ss.pettingzoo_env_to_vec_env_v1(env)
        print(n_envs)
        env = ss.concat_vec_envs_v1(env,
                                    n_envs,
                                    num_cpus=4,
                                    base_class="stable_baselines3")
        env = VecMonitor(env)

        env = self._maybe_normalize(env, eval_env)

        if is_image_space(
                env.observation_space) and not is_image_space_channels_first(
                    env.observation_space):
            if self.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)

        return env
示例#2
0
def env_creator():
    env = pistonball_v4.env(n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125)
    env = ss.color_reduction_v0(env, mode='B')
    env = ss.dtype_v0(env, 'float32')
    env = ss.resize_v0(env, x_size=84, y_size=84)
    env = ss.normalize_obs_v0(env, env_min=0, env_max=1)
    env = ss.frame_stack_v1(env, 3)
    return env
 def env_creator(args):
     env = env_constr.env(
     )  #killable_knights=False, killable_archers=False)
     resize_size = 84 if model == None else 32
     env = supersuit.resize_v0(env,
                               resize_size,
                               resize_size,
                               linear_interp=True)
     env = supersuit.color_reduction_v0(env)
     env = supersuit.pad_action_space_v0(env)
     env = supersuit.pad_observations_v0(env)
     # env = supersuit.frame_stack_v0(env,2)
     env = supersuit.dtype_v0(env, np.float32)
     env = supersuit.normalize_obs_v0(env)
     if model == "MLPModelV2":
         env = supersuit.flatten_v0(env)
     env = PettingZooEnv(env)
     return env
示例#4
0
def unwrapped_check(env):
    # image observations
    if isinstance(env.observation_space, spaces.Box):
        if ((env.observation_space.low.shape == 3)
                and (env.observation_space.low == 0).all()
                and (len(env.observation_space.shape[2]) == 3)
                and (env.observation_space.high == 255).all()):
            env = max_observation_v0(env, 2)
            env = color_reduction_v0(env, mode="full")
            env = normalize_obs_v0(env)

    # box action spaces
    if isinstance(env.action_space, spaces.Box):
        env = clip_actions_v0(env)
        env = scale_actions_v0(env, 0.5)

    # stackable observations
    if isinstance(env.observation_space, spaces.Box) or isinstance(
            env.observation_space, spaces.Discrete):
        env = frame_stack_v1(env, 2)

    # not discrete and not multibinary observations
    if not isinstance(env.observation_space,
                      spaces.Discrete) and not isinstance(
                          env.observation_space, spaces.MultiBinary):
        env = dtype_v0(env, np.float16)
        env = flatten_v0(env)
        env = frame_skip_v0(env, 2)

    # everything else
    env = clip_reward_v0(env, lower_bound=-1, upper_bound=1)
    env = delay_observations_v0(env, 2)
    env = sticky_actions_v0(env, 0.5)
    env = nan_random_v0(env)
    env = nan_zeros_v0(env)

    assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
示例#5
0
def unwrapped_check(env):
    env.reset()
    agents = env.agents

    if image_observation(env, agents):
        env = max_observation_v0(env, 2)
        env = color_reduction_v0(env, mode="full")
        env = normalize_obs_v0(env)

    if box_action(env, agents):
        env = clip_actions_v0(env)
        env = scale_actions_v0(env, 0.5)

    if observation_homogenizable(env, agents):
        env = pad_observations_v0(env)
        env = frame_stack_v1(env, 2)
        env = agent_indicator_v0(env)
        env = black_death_v3(env)

    if (not_dict_observation(env, agents)
            and not_discrete_observation(env, agents)
            and not_multibinary_observation(env, agents)):
        env = dtype_v0(env, np.float16)
        env = flatten_v0(env)
        env = frame_skip_v0(env, 2)

    if action_homogenizable(env, agents):
        env = pad_action_space_v0(env)

    env = clip_reward_v0(env, lower_bound=-1, upper_bound=1)
    env = delay_observations_v0(env, 2)
    env = sticky_actions_v0(env, 0.5)
    env = nan_random_v0(env)
    env = nan_zeros_v0(env)

    assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
示例#6
0
               name=experiment_name,
               monitor_gym=True,
               save_code=True)
    writer = SummaryWriter(f"/tmp/{experiment_name}")

# TRY NOT TO MODIFY: seeding
device = torch.device(
    'cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = args.torch_deterministic

# petting zoo
env = pistonball_v4.parallel_env()
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
envs = ss.concat_vec_envs_v0(env,
                             args.num_envs,
                             num_cpus=0,
                             base_class='stable_baselines3')
envs = VecMonitor(envs)
if args.capture_video:
    envs = VecVideoRecorder(envs,
                            f'videos/{experiment_name}',
                            record_video_trigger=lambda x: x % 150000 == 0,
                            video_length=400)
envs = VecPyTorch(envs, device)
args.num_envs = envs.num_envs
示例#7
0
    assert obs.shape == (64, 3)
    first_obs, _, _, _ = env.step(5)
    assert np.all(np.equal(first_obs, base_obs.reshape([64, 3])))


def new_continuous_dummy():
    base_act_spaces = Box(low=np.float32(0.0), high=np.float32(10.0), shape=[3])
    return DummyEnv(base_obs, base_obs_space, base_act_spaces)


def new_dummy():
    return DummyEnv(base_obs, base_obs_space, base_act_spaces)


wrappers = [
    supersuit.color_reduction_v0(new_dummy(), "R"),
    supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10),
    supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10, linear_interp=True),
    supersuit.dtype_v0(new_dummy(), np.int32),
    supersuit.flatten_v0(new_dummy()),
    supersuit.reshape_v0(new_dummy(), (64, 3)),
    supersuit.normalize_obs_v0(new_dummy(), env_min=-1, env_max=5.0),
    supersuit.frame_stack_v1(new_dummy(), 8),
    supersuit.reward_lambda_v0(new_dummy(), lambda x: x / 10),
    supersuit.clip_reward_v0(new_dummy()),
    supersuit.clip_actions_v0(new_continuous_dummy()),
    supersuit.frame_skip_v0(new_dummy(), 4),
    supersuit.frame_skip_v0(new_dummy(), (4, 6)),
    supersuit.sticky_actions_v0(new_dummy(), 0.75),
    supersuit.delay_observations_v0(new_dummy(), 1),
]
示例#8
0
def env_creator(config):
    env = pistonball_v6.env()
    env = dtype_v0(env, dtype=np.float32)
    env = color_reduction_v0(env, mode="R")
    env = normalize_obs_v0(env)
    return env
示例#9
0
 def env_creator(config):
     env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
     env = dtype_v0(env, dtype=float32)
     env = color_reduction_v0(env, mode="R")
     env = normalize_obs_v0(env)
     return env
示例#10
0
def test_pettinzoo_pad_action_space():
    _env = simple_world_comm_v2.env()
    wrapped_env = pad_action_space_v0(_env)
    api_test.api_test(wrapped_env)
    seed_test.seed_test(
        lambda: sticky_actions_v0(simple_world_comm_v2.env(), 0.5), 100)


def test_pettingzoo_parallel_env():
    _env = simple_world_comm_v2.parallel_env()
    wrapped_env = pad_action_space_v0(_env)
    parallel_test.parallel_play_test(wrapped_env)


wrappers = [
    supersuit.color_reduction_v0(knights_archers_zombies_v4.env(), "R"),
    supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8),
                        x_size=5,
                        y_size=10),
    supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8),
                        x_size=5,
                        y_size=10,
                        linear_interp=True),
    supersuit.dtype_v0(knights_archers_zombies_v4.env(), np.int32),
    supersuit.flatten_v0(knights_archers_zombies_v4.env()),
    supersuit.reshape_v0(knights_archers_zombies_v4.env(), (512 * 512, 3)),
    supersuit.normalize_obs_v0(dtype_v0(knights_archers_zombies_v4.env(),
                                        np.float32),
                               env_min=-1,
                               env_max=5.0),
    supersuit.frame_stack_v1(knights_archers_zombies_v4.env(), 8),
示例#11
0
eval_freq = int(n_timesteps / n_evaluations)
eval_freq = max(eval_freq // (n_envs*n_agents), 1)

model = PPO("MlpPolicy", env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256)
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=eval_freq, deterministic=True, render=False)
model.learn(total_timesteps=n_timesteps, callback=eval_callback)

model = PPO.load("./logs/best_model")

mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10)

print(mean_reward)
print(std_reward)

render_env = base_env.copy().parallel_env()
render_env = ss.color_reduction_v0(render_env, mode='B')
render_env = ss.resize_v0(render_env, x_size=84, y_size=84)
render_env = ss.frame_stack_v1(render_env, 3)

obs_list = []
i = 0
render_env.reset()


while True:
    for agent in render_env.agent_iter():
        observation, _, done, _ = render_env.last()
        action = model.predict(observation, deterministic=True)[0] if not done else None

        render_env.step(action)
        i += 1
with open("./hyperparameter_jsons/" + "hyperparameters_" + num + ".json") as f:
    params = json.load(f)

print(params)


def image_transpose(env):
    if is_image_space(env.observation_space) and not is_image_space_channels_first(
        env.observation_space
    ):
        env = VecTransposeImage(env)
    return env


env = pistonball_v5.parallel_env()
env = ss.color_reduction_v0(env, mode="B")
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v1(env)
env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=1, base_class="stable_baselines3")
env = VecMonitor(env)
env = image_transpose(env)

eval_env = pistonball_v5.parallel_env()
eval_env = ss.color_reduction_v0(eval_env, mode="B")
eval_env = ss.resize_v0(eval_env, x_size=84, y_size=84)
eval_env = ss.frame_stack_v1(eval_env, 3)
eval_env = ss.pettingzoo_env_to_vec_env_v1(eval_env)
eval_env = ss.concat_vec_envs_v1(
    eval_env, 1, num_cpus=1, base_class="stable_baselines3"
)
示例#13
0
 def env_creator(config):
     env = zoo_yaniv.env(config=config)
     env = dtype_v0(env, dtype=float32)
     env = color_reduction_v0(env, mode="R")
     env = normalize_obs_v0(env)
     return env
def test_pettingzoo_pad_action_space():
    _env = simple_world_comm_v2.env()
    wrapped_env = pad_action_space_v0(_env)
    api_test(wrapped_env)
    seed_test(lambda: sticky_actions_v0(simple_world_comm_v2.env(), 0.5), 100)


def test_pettingzoo_parallel_env():
    _env = simple_world_comm_v2.parallel_env()
    wrapped_env = pad_action_space_v0(_env)
    parallel_test.parallel_api_test(wrapped_env)


wrappers = [
    supersuit.color_reduction_v0(
        knights_archers_zombies_v10.env(vector_state=False), "R"),
    supersuit.resize_v1(
        dtype_v0(knights_archers_zombies_v10.env(vector_state=False),
                 np.uint8),
        x_size=5,
        y_size=10,
    ),
    supersuit.resize_v1(
        dtype_v0(knights_archers_zombies_v10.env(vector_state=False),
                 np.uint8),
        x_size=5,
        y_size=10,
        linear_interp=True,
    ),
    supersuit.dtype_v0(knights_archers_zombies_v10.env(), np.int32),
    supersuit.flatten_v0(knights_archers_zombies_v10.env()),