示例#1
0
def test_compute_action_mask(action_mask: ActionMaskEnv):
    with mock.patch.object(action_mask.state_cache, "getLegalActions") as mock_get:
        mock_get.return_value = []
        assert action_mask.compute_action_mask() == [1, 1, 1, 1]

    with mock.patch.object(action_mask.state_cache, "getLegalActions") as mock_get:
        mock_get.return_value = ["East"]
        assert action_mask.compute_action_mask() == [0, 0, 1, 0]
示例#2
0
def test_step(action_mask: ActionMaskEnv):
    obs, reward, done, _ = action_mask.step(2)
    assert obs.shape == (47, 15, 6)
    assert reward == 10
    assert not done

    obs, reward, done, mask = action_mask.step(0)
    assert reward == 10
    assert action_mask.state_cache.getPacmanDirection() == 'East'
    assert mask == {'action_mask': [0, 1, 1, 1]}

    obs, reward, done, mask = action_mask.step(3)
    assert reward == 0
    assert action_mask.state_cache.getPacmanDirection() == 'West'
    assert mask == {'action_mask': [0, 0, 1, 1]}
tensorboard_folder = './tensorboard/Pacman/action_mask/'
model_folder = './models/Pacman/action_mask/'
if not os.path.isdir(tensorboard_folder):
    os.makedirs(tensorboard_folder)
if not os.path.isdir(model_folder):
    os.makedirs(model_folder)

policy = ''
model_tag = ''
if len(sys.argv) > 1:
    policy = sys.argv[1]
    model_tag = '_' + sys.argv[1]

if __name__ == '__main__':
    env = SubprocVecEnv([lambda: ActionMaskEnv() for i in range(4)])
    env = VecFrameStack(env, 3)

    model = ACKTR(get_policy(policy), env, n_steps=100, verbose=0,vf_fisher_coef=0.5 , tensorboard_log=tensorboard_folder, kfac_update=10, n_cpu_tf_sess=2, async_eigen_decomp=False)
    model.learn(total_timesteps=100000000, tb_log_name='ACKTR_A2C' + model_tag)

    model.save(model_folder + "ACKTR_A2C" + model_tag)
    del model
    model = ACKTR.load(model_folder + "ACKTR_A2C" + model_tag)

    done = False
    states = None
    action_masks = []
    obs = env.reset()

    while not done:
def action_mask():
    return ActionMaskEnv('test_map')
tensorboard_folder = './tensorboard/Pacman/action_mask/'
model_folder = './models/Pacman/base/'
if not os.path.isdir(tensorboard_folder):
    os.makedirs(tensorboard_folder)
if not os.path.isdir(model_folder):
    os.makedirs(model_folder)

policy = ''
model_tag = ''
if len(sys.argv) > 1:
    policy = sys.argv[1]
    model_tag = '_' + sys.argv[1]

if __name__ == '__main__':
    env = DummyVecEnv([lambda: ActionMaskEnv() for i in range(4)])
    env = VecFrameStack(env, 3)

    model = PPO2.load(model_folder + "PPO2" + model_tag)

    done = [False, False, False, False]
    states = None
    action_masks = []
    obs = env.reset()

    while not done[0]:
        action, states = model.predict(obs, states, action_mask=action_masks)
        obs, _, done, infos = env.step(action)
        env.render()
        action_masks.clear()
        for info in infos: