Exemplos de solve_zero_sum_matrix_game em Python, exemplos de open_spiel.python.algorithms.lp_solver.solve_zero_sum_matrix_game em Python

Exemplo n.º 1

0

Exibir arquivo

def nash_strategy(solver, return_joint=False):
    """Returns nash distribution on meta game matrix.

  This method only works for two player zero-sum games.

  Args:
    solver: GenPSROSolver instance.
    return_joint: If true, only returns marginals. Otherwise marginals as well
      as joint probabilities.

  Returns:
    Nash distribution on strategies.
  """
    meta_games = solver.get_meta_game()
    if not isinstance(meta_games, list):
        meta_games = [meta_games, -meta_games]
    meta_games = [x.tolist() for x in meta_games]
    if len(meta_games) != 2:
        raise NotImplementedError(
            "nash_strategy solver works only for 2p zero-sum"
            "games, but was invoked for a {} player game".format(
                len(meta_games)))
    nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(*meta_games)))
    result = [
        renormalize(np.array(nash_prob_1).reshape(-1)),
        renormalize(np.array(nash_prob_2).reshape(-1))
    ]

    if not return_joint:
        return result
    else:
        joint_strategies = get_joint_strategy_from_marginals(result)
        return result, joint_strategies

Exemplo n.º 2

0

Exibir arquivo

Arquivo: value_it_vs_human.py Projeto: Tubbz-alt/goofspiel

    def _matrix_game(self, state):
        # This function sets up a matrix game, solves it and returns the policies

        p0_utils = []  # row player
        p1_utils = []  # col player
        row = 0
        key = str(state)
        states = {key: state}
        transitions = {}
        value_iteration._initialize_maps(states, self._values, transitions)
        for p0action in state.legal_actions(0):
            # new row
            p0_utils.append([])
            p1_utils.append([])
            for p1action in state.legal_actions(1):
                # loop from left-to-right of columns
                next_states = transitions[(key, p0action, p1action)]
                joint_q_value = sum(p * self._values[next_state]
                                    for next_state, p in next_states)
                p0_utils[row].append(joint_q_value)
                p1_utils[row].append(-joint_q_value)
            row += 1
        stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils)
        solution = lp_solver.solve_zero_sum_matrix_game(stage_game)
        probs = solution[0]
        actions = state.legal_actions(
            0)  # double check that order is consistent with probs
        return actions, probs

Exemplo n.º 3

0

Exibir arquivo

Arquivo: matrix_game_utils_test.py Projeto: DailyActie/AI_RL_APP-open_spiel

 def test_extensive_to_matrix_game(self):
   kuhn_game = pyspiel.load_game("kuhn_poker")
   kuhn_matrix_game = pyspiel.extensive_to_matrix_game(kuhn_game)
   unused_p0_strategy, unused_p1_strategy, p0_sol_val, p1_sol_val = (
       lp_solver.solve_zero_sum_matrix_game(kuhn_matrix_game))
   # value from Kuhn 1950 or https://en.wikipedia.org/wiki/Kuhn_poker
   self.assertAlmostEqual(p0_sol_val, -1 / 18)
   self.assertAlmostEqual(p1_sol_val, +1 / 18)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: lp_solver_test.py Projeto: DailyActie/AI_RL_APP-open_spiel

 def test_solve_blotto(self):
     blotto_matrix_game = pyspiel.load_matrix_game("blotto")
     p0_sol, p1_sol, p0_sol_val, p1_sol_val = (
         lp_solver.solve_zero_sum_matrix_game(blotto_matrix_game))
     self.assertEqual(len(p0_sol), blotto_matrix_game.num_rows())
     self.assertEqual(len(p1_sol), blotto_matrix_game.num_cols())
     # Symmetric game, must be zero
     self.assertAlmostEqual(p0_sol_val, 0.0)
     self.assertAlmostEqual(p1_sol_val, 0.0)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: lp_solver_test.py Projeto: DailyActie/AI_RL_APP-open_spiel

 def test_rock_paper_scissors(self):
     p0_sol, p1_sol, p0_sol_val, p1_sol_val = (
         lp_solver.solve_zero_sum_matrix_game(
             pyspiel.create_matrix_game(
                 [[0.0, -1.0, 1.0], [1.0, 0.0, -1.0], [-1.0, 1.0, 0.0]],
                 [[0.0, 1.0, -1.0], [-1.0, 0.0, 1.0], [1.0, -1.0, 0.0]])))
     self.assertEqual(len(p0_sol), 3)
     self.assertEqual(len(p1_sol), 3)
     for i in range(3):
         self.assertAlmostEqual(p0_sol[i], 1.0 / 3.0)
         self.assertAlmostEqual(p1_sol[i], 1.0 / 3.0)
     self.assertAlmostEqual(p0_sol_val, 0.0)
     self.assertAlmostEqual(p1_sol_val, 0.0)

Exemplo n.º 6

0

Exibir arquivo

def main(_):
    # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_mp"))
    # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_rps"))
    p0_sol, p1_sol, p0_sol_val, p1_sol_val = lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(
            [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]],
            [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]]))
    print("p0 val = {}, policy = {}".format(p0_sol_val, p0_sol))
    print("p1 val = {}, policy = {}".format(p1_sol_val, p1_sol))
    print(p0_sol[1])

    mixture = lp_solver.is_dominated(
        0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]],
        0,
        lp_solver.DOMINANCE_WEAK,
        return_mixture=True)
    print(mixture)

Exemplo n.º 7

0

Exibir arquivo

 def test_asymmetric_pure_nonzero_val(self):
   #        c0      c1       c2
   # r0 | 2, -2 |  1, -1 |  5, -5
   # r1 |-3,  3 | -4,  4 | -2,  2
   #
   # Pure eq (r0,c1) for a value of (1, -1)
   # 2nd row is dominated, and then second player chooses 2nd col.
   p0_sol, p1_sol, p0_sol_val, p1_sol_val = (
       lp_solver.solve_zero_sum_matrix_game(
           pyspiel.create_matrix_game([[2.0, 1.0, 5.0], [-3.0, -4.0, -2.0]],
                                      [[-2.0, -1.0, -5.0], [3.0, 4.0, 2.0]])))
   self.assertLen(p0_sol, 2)
   self.assertLen(p1_sol, 3)
   self.assertAlmostEqual(p0_sol[0], 1.0)
   self.assertAlmostEqual(p0_sol[1], 0.0)
   self.assertAlmostEqual(p1_sol[0], 0.0)
   self.assertAlmostEqual(p1_sol[1], 1.0)
   self.assertAlmostEqual(p0_sol_val, 1.0)
   self.assertAlmostEqual(p1_sol_val, -1.0)

Exemplo n.º 8

0

Exibir arquivo

 def test_biased_rock_paper_scissors(self):
   # See sec 6.2 of Bosansky et al. 2016. Algorithms for Computing Strategies
   # in Two-Player Simultaneous Move Games
   # http://mlanctot.info/files/papers/aij-2psimmove.pdf
   p0_sol, p1_sol, p0_sol_val, p1_sol_val = (
       lp_solver.solve_zero_sum_matrix_game(
           pyspiel.create_matrix_game(
               [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]],
               [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]])))
   self.assertLen(p0_sol, 3)
   self.assertLen(p1_sol, 3)
   self.assertAlmostEqual(p0_sol[0], 1.0 / 16.0, places=4)
   self.assertAlmostEqual(p1_sol[0], 1.0 / 16.0, places=4)
   self.assertAlmostEqual(p0_sol[1], 10.0 / 16.0, places=4)
   self.assertAlmostEqual(p1_sol[1], 10.0 / 16.0, places=4)
   self.assertAlmostEqual(p0_sol[2], 5.0 / 16.0, places=4)
   self.assertAlmostEqual(p1_sol[2], 5.0 / 16.0, places=4)
   self.assertAlmostEqual(p0_sol_val, 0.0)
   self.assertAlmostEqual(p1_sol_val, 0.0)

Exemplo n.º 9

0

Exibir arquivo

def nash_strategy(solver):
    """Returns nash distribution on meta game matrix.

  This method only works for two player zero-sum games.

  Args:
    solver: GenPSROSolver instance.

  Returns:
    Nash distribution on strategies.
  """
    meta_games = solver.get_meta_game
    if not isinstance(meta_games, list):
        meta_games = [meta_games, -meta_games]
    meta_games = [x.tolist() for x in meta_games]
    nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(*meta_games)))
    return [
        renormalize(np.array(nash_prob_1).reshape(-1)),
        renormalize(np.array(nash_prob_2).reshape(-1))
    ]

Exemplo n.º 10

0

Exibir arquivo

def solve_subgame(subgame_payoffs):
    """Solves the subgame using OpenSpiel's LP solver."""
    p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(*subgame_payoffs))
    p0_sol, p1_sol = np.asarray(p0_sol), np.asarray(p1_sol)
    return [p0_sol / p0_sol.sum(), p1_sol / p1_sol.sum()]

Exemplo n.º 11

0

Exibir arquivo

Arquivo: matrix_nash.py Projeto: ngrupen/open_spiel

 def gen():
     p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game(
         pyspiel.create_matrix_game(row_payoffs - col_payoffs,
                                    col_payoffs - row_payoffs))
     yield (np.squeeze(p0_sol, 1), np.squeeze(p1_sol, 1))

Exemplo n.º 12

0

Exibir arquivo

def value_iteration(game, depth_limit, threshold, cyclic_game=False):
    """Solves for the optimal value function of a game.

  For small games only! Solves the game using value iteration,
  with the maximum error for the value function less than threshold.
  This algorithm works for sequential 1-player games or 2-player zero-sum
  games, with or without chance nodes.

  Arguments:
    game: The game to analyze, as returned by `load_game`.
    depth_limit: How deeply to analyze the game tree. Negative means no limit, 0
      means root-only, etc.
    threshold: Maximum error for state values..
    cyclic_game: set to True if the game has cycles (from state A we can get to
      state B, and from state B we can get back to state A).


  Returns:
    A `dict` with string keys and float values, mapping string encoding of
    states to the values of those states.
  """
    assert game.num_players() in (1, 2), (
        "Game must be a 1-player or 2-player game")
    if game.num_players() == 2:
        assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM, (
            "2-player games must be zero sum games")

    # Must be perfect information or one-shot (not imperfect information).
    assert (game.get_type().information
            == pyspiel.GameType.Information.ONE_SHOT
            or game.get_type().information
            == pyspiel.GameType.Information.PERFECT_INFORMATION)

    # We expect Value Iteration to be used with perfect information games, in
    # which `str` is assumed to display the state of the game.
    states = get_all_states.get_all_states(game,
                                           depth_limit,
                                           True,
                                           False,
                                           to_string=str,
                                           stop_if_encountered=cyclic_game)
    values = {}
    transitions = {}

    _initialize_maps(states, values, transitions)
    error = threshold + 1  # A value larger than threshold
    min_utility = game.min_utility()
    while error > threshold:
        error = 0
        for key, state in states.items():
            if state.is_terminal():
                continue
            elif state.is_simultaneous_node():
                # Simultaneous node. Assemble a matrix game from the child utilities.
                # and solve it using a matrix game solver.
                p0_utils = []  # row player
                p1_utils = []  # col player
                row = 0
                for p0action in state.legal_actions(0):
                    # new row
                    p0_utils.append([])
                    p1_utils.append([])
                    for p1action in state.legal_actions(1):
                        # loop from left-to-right of columns
                        next_states = transitions[(key, p0action, p1action)]
                        joint_q_value = sum(p * values[next_state]
                                            for next_state, p in next_states)
                        p0_utils[row].append(joint_q_value)
                        p1_utils[row].append(-joint_q_value)
                    row += 1
                stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils)
                solution = lp_solver.solve_zero_sum_matrix_game(stage_game)
                value = solution[2]
            else:
                # Regular decision node
                player = state.current_player()
                value = min_utility if player == 0 else -min_utility
                for action in state.legal_actions():
                    next_states = transitions[(key, action)]
                    q_value = sum(p * values[next_state]
                                  for next_state, p in next_states)
                    if player == 0:
                        value = max(value, q_value)
                    else:
                        value = min(value, q_value)
            error = max(abs(values[key] - value), error)
            values[key] = value

    return values

Exemplo n.º 13

0

Exibir arquivo

def nash_solver(meta_games,
                solver="gambit",
                mode="one",
                gambit_path=None,
                lrsnash_path=None):
    """
    Solver for NE.
    :param meta_games: meta-games in PSRO.
    :param solver: options "gambit", "nashpy", "linear", "lrsnash", "replicator".
    :param mode: options "all", "one", "pure"
    :param lrsnash_path: path to lrsnash solver.
    :return: a list of NE.
    WARNING:
    opening up a subprocess in every iteration eventually
    leads the os to block the subprocess. Not usable.
    """
    num_players = len(meta_games)
    if solver == "gambit":
        return gambit_solve(meta_games, mode, gambit_path=gambit_path)
    elif solver == "replicator":
        return [replicator_dynamics(meta_games)]
    else:
        assert num_players == 2

        num_rows, num_cols = np.shape(meta_games[0])
        row_payoffs, col_payoffs = meta_games[0], meta_games[1]

        if num_rows == 1 or num_cols == 1:
            equilibria = itertools.product(np.eye(num_rows), np.eye(num_cols))
        elif mode == 'pure':
            return pure_ne_solve(meta_games)

        elif solver == "linear":
            meta_games = [x.tolist() for x in meta_games]
            nash_prob_1, nash_prob_2, _, _ = (
                lp_solver.solve_zero_sum_matrix_game(
                    pyspiel.create_matrix_game(*meta_games)))
            return [
                renormalize(np.array(nash_prob_1).reshape(-1)),
                renormalize(np.array(nash_prob_2).reshape(-1))
            ]
        elif solver == "lrsnash":
            logging.info("Using lrsnash solver.")
            equilibria = lrs_solve(row_payoffs, col_payoffs, lrsnash_path)
        elif solver == "nashpy":
            if mode == "all":
                logging.info("Using nashpy vertex enumeration.")
                equilibria = nashpy.Game(row_payoffs,
                                         col_payoffs).vertex_enumeration()
            else:
                logging.info("Using nashpy Lemke-Howson solver.")
                equilibria = lemke_howson_solve(row_payoffs, col_payoffs)
        else:
            raise ValueError("Please choose a valid NE solver.")

        equilibria = iter(equilibria)
        # check that there's at least one equilibrium
        try:
            equilibria = itertools.chain([next(equilibria)], equilibria)
        except StopIteration:
            logging.warning("degenerate game!")
            #            pklfile = open('/home/qmaai/degenerate_game.pkl','wb')
            #            pickle.dump([row_payoffs,col_payoffs],pklfile)
            #            pklfile.close()
            # degenerate game apply support enumeration
            equilibria = nashpy.Game(row_payoffs,
                                     col_payoffs).support_enumeration()
            try:
                equilibria = itertools.chain([next(equilibria)], equilibria)
            except StopIteration:
                logging.warning("no equilibrium!")

        equilibria = list(equilibria)
        if mode == 'all':
            return equilibria
        elif mode == 'one':
            return equilibria[0]
        else:
            raise ValueError("Please choose a valid mode.")