示例#1
0
 def test_spot_to_action(self):
     self.assertEqual(
         GameWrapper.convert_spot_to_action(GameBoard.Spot(0, 0)), 0)
     self.assertEqual(
         GameWrapper.convert_spot_to_action(GameBoard.Spot(1, 0)), 8)
     self.assertEqual(
         GameWrapper.convert_spot_to_action(GameBoard.Spot(1, 2)), 10)
示例#2
0
    def test_execute_move_legal(self):
        # current player is p1
        game = GameWrapper(1)

        # place a piece, to flip (3,5)
        env_observation, reward, done, game, is_valid = game.execute_move(8 *
                                                                          3 +
                                                                          5)
        # expection:
        #   player switched,
        #   not done game,
        #   new board observation,
        #   reward 0.01, +1 new pieces
        self.assertTrue(is_valid)
        self.assertEqual(game.current_player, GameBoard.PLAYER_2)
        self.assertFalse(done)
        self.assertEqual(reward, 0.01)
        np.testing.assert_array_equal(
            env_observation,
            np.asarray([
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                0.0, 0.0, 0.0, -1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,
                -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                0.0, 0.0, 0.0, 0.0
            ]))
示例#3
0
    def test_execute_move_pass(self):
        game = GameWrapper(1)
        old_observation = game.reset()

        env_observation, reward, done, game, is_valid = game.execute_move(
            game.PASS_TURN_ACTION)
        self.assertTrue(is_valid)
        np.testing.assert_array_equal(env_observation, old_observation * -1)
        self.assertEqual(reward, -0.01)
        self.assertFalse(done)
示例#4
0
 def test_reset(self):
     env = GameWrapper(1)
     observation = env.reset()
     np.testing.assert_array_equal(
         observation,
         np.asarray([
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0,
             1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.0
         ]))
示例#5
0
 def test_execute_move_illegal(self):
     game = GameWrapper(1)
     old_player = game.current_player
     # place at the corner. Not a valid move.
     env_observation, reward, done, game, is_valid = game.execute_move(0)
     # expecting: player not switched, same board observation, reward -0.01
     self.assertFalse(is_valid)
     self.assertEqual(old_player, game.current_player)
     self.assertEqual(reward, -0.01)
     self.assertFalse(done)
     np.testing.assert_array_equal(
         env_observation,
         np.asarray([
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0,
             1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.0
         ]))
示例#6
0
    def test_action_to_spot(self):
        self.assertEqual(GameWrapper.convert_action_to_spot(0),
                         GameBoard.Spot(0, 0))
        self.assertEqual(GameWrapper.convert_action_to_spot(8),
                         GameBoard.Spot(1, 0))
        self.assertEqual(GameWrapper.convert_action_to_spot(10),
                         GameBoard.Spot(1, 2))

        with self.assertRaises(Exception) as context:
            GameWrapper.convert_action_to_spot(64)

        with self.assertRaises(Exception) as context:
            GameWrapper.convert_action_to_spot(-1)
        # policy gradients, weighted by advantages. only calc on actions we take.
        actions = tf.cast(actions, tf.int32)
        policy_loss = weighted_sparse_ce(actions,
                                         logits,
                                         sample_weight=advantages)
        # entropy loss, by cross-entropy over itself
        probs = tf.nn.softmax(logits)
        entropy_loss = tf.keras.losses.categorical_crossentropy(probs, probs)
        # loss function: to minimize policy and maximize entropy losses.
        # flip signs, optimizer minimizes
        return policy_loss - self.ent_coef * entropy_loss


if __name__ == '__main__':
    logging.basicConfig(
        format='%(levelname)s:%(message)s',
        # level=logging.DEBUG)
        level=logging.INFO)

    # create the Game
    board_size = 6
    game = GameWrapper(1, board_size=board_size)

    # create the agent
    agent_nn = A2CAgentNN(action_size=game.get_action_size(),
                          input_size=game.get_vision_shape())
    agent = A2CAgentV1(agent_nn)

    # Train
    agent.train(game, 5, 10000)
示例#8
0
 def test_execute_move_tie_p2(self):
     game_env = GameWrapper(1)
     game_env.current_player = GameBoard.PLAYER_2
     game_env.game_board.board = \
         list(map(lambda r: list(map(lambda i: i * -1.0, r)),
                  [[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, ],
                   [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, ],
                   [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, ],
                   [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, ],
                   [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ],
                   [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ],
                   [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ],
                   [1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0]]))
     # place at the last spot to win
     env_observation, reward, done, game_env, is_valid = game_env.execute_move(
         63)
     # expection:
     #   player switched to p1,
     #   game is done
     #   new board observation,
     #   reward 0.01, +1 new pieces
     self.assertTrue(is_valid)
     self.assertEqual(game_env.current_player, GameBoard.PLAYER_2)
     self.assertTrue(done)
     self.assertEqual(reward, 0)
     np.testing.assert_array_equal(
         env_observation,
         np.asarray([
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             -1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
         ]))
示例#9
0
 def test_execute_move_win(self):
     game_env = GameWrapper(1)
     game_env.game_board.board = [[
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
     ], [
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
     ], [
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
     ], [
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
     ], [
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
     ], [
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
     ], [
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
         1.0,
     ], [1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0]]
     # place at the last spot to win
     env_observation, reward, done, game_env, is_valid = game_env.execute_move(
         63)
     # expection:
     #   player switched to p1,
     #   game is done
     #   new board observation,
     #   reward 0.01, +1 new pieces
     self.assertTrue(is_valid)
     self.assertEqual(game_env.current_player, GameBoard.PLAYER_1)
     self.assertTrue(done)
     self.assertEqual(reward, 64)
     np.testing.assert_array_equal(
         env_observation,
         np.asarray([
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
             1.0,
         ]))