Exemplo n.º 1
0
def env_creator():
    env = pistonball_v6.env(n_pistons=20,
                            time_penalty=-0.1,
                            continuous=True,
                            random_drop=True,
                            random_rotate=True,
                            ball_mass=0.75,
                            ball_friction=0.3,
                            ball_elasticity=1.5,
                            max_cycles=125)
    env = ss.color_reduction_v0(env, mode='B')
    env = ss.dtype_v0(env, 'float32')
    env = ss.resize_v0(env, x_size=84, y_size=84)
    env = ss.normalize_obs_v0(env, env_min=0, env_max=1)
    env = ss.frame_stack_v1(env, 3)
    return env
Exemplo n.º 2
0
def main():
    env = pistonball_v6.env(n_pistons=20, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125)
    total_reward = 0
    obs_list = []
    NUM_RESETS = 1
    i = 0
    for i in range(NUM_RESETS):
        env.reset()
        for agent in env.agent_iter():
            obs, rew, done, info = env.last()
            act = policy(obs) if not done else None
            env.step(act)
            total_reward += rew
            i += 1
            if i % (len(env.possible_agents)+1) == 0:
                obs_list.append(np.transpose(env.render(mode='rgb_array'), axes=(1, 0, 2)))

    env.close()
    print("average total reward: ", total_reward/NUM_RESETS)
    write_gif(obs_list, 'pistonball_ben.gif', fps=15)
Exemplo n.º 3
0
def env_creator(config):
    env = pistonball_v6.env()
    env = dtype_v0(env, dtype=np.float32)
    env = color_reduction_v0(env, mode="R")
    env = normalize_obs_v0(env)
    return env
Exemplo n.º 4
0
def get_env(args: argparse.Namespace = get_args()):
    return PettingZooEnv(
        pistonball_v6.env(continuous=False, n_pistons=args.n_pistons))
Exemplo n.º 5
0
    verbose=3,
    gamma=0.95,
    n_steps=256,
    ent_coef=0.0905168,
    learning_rate=0.00062211,
    vf_coef=0.042202,
    max_grad_norm=0.9,
    gae_lambda=0.99,
    n_epochs=5,
    clip_range=0.3,
    batch_size=256,
)
model.learn(total_timesteps=2000000)
model.save("policy")

# Rendering

env = pistonball_v6.env()
env = ss.color_reduction_v0(env, mode="B")
env = ss.resize_v1(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)

model = PPO.load("policy")

env.reset()
for agent in env.agent_iter():
    obs, reward, done, info = env.last()
    act = model.predict(obs, deterministic=True)[0] if not done else None
    env.step(act)
    env.render()