Python create_matrix_game示例，pyspiel.create_matrix_game Python示例

示例#1

0

显示文件

文件： value_it_vs_human.py 项目： Tubbz-alt/goofspiel

    def _matrix_game(self, state):
        # This function sets up a matrix game, solves it and returns the policies

        p0_utils = []  # row player
        p1_utils = []  # col player
        row = 0
        key = str(state)
        states = {key: state}
        transitions = {}
        value_iteration._initialize_maps(states, self._values, transitions)
        for p0action in state.legal_actions(0):
            # new row
            p0_utils.append([])
            p1_utils.append([])
            for p1action in state.legal_actions(1):
                # loop from left-to-right of columns
                next_states = transitions[(key, p0action, p1action)]
                joint_q_value = sum(p * self._values[next_state]
                                    for next_state, p in next_states)
                p0_utils[row].append(joint_q_value)
                p1_utils[row].append(-joint_q_value)
            row += 1
        stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils)
        solution = lp_solver.solve_zero_sum_matrix_game(stage_game)
        probs = solution[0]
        actions = state.legal_actions(
            0)  # double check that order is consistent with probs
        return actions, probs

示例#2

0

显示文件

def nash_strategy(solver, return_joint=False):
    """Returns nash distribution on meta game matrix.

  This method only works for two player zero-sum games.

  Args:
    solver: GenPSROSolver instance.
    return_joint: If true, only returns marginals. Otherwise marginals as well
      as joint probabilities.

  Returns:
    Nash distribution on strategies.
  """
    meta_games = solver.get_meta_game()
    if not isinstance(meta_games, list):
        meta_games = [meta_games, -meta_games]
    meta_games = [x.tolist() for x in meta_games]
    if len(meta_games) != 2:
        raise NotImplementedError(
            "nash_strategy solver works only for 2p zero-sum"
            "games, but was invoked for a {} player game".format(
                len(meta_games)))
    nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(*meta_games)))
    result = [
        renormalize(np.array(nash_prob_1).reshape(-1)),
        renormalize(np.array(nash_prob_2).reshape(-1))
    ]

    if not return_joint:
        return result
    else:
        joint_strategies = get_joint_strategy_from_marginals(result)
        return result, joint_strategies

示例#3

0

显示文件

文件： utils_matrix.py 项目： YannouRavoet/ml_project

def _battle_of_the_sexes_easy():  # COORDINATION
    return pyspiel.create_matrix_game(
        "battle_of_the_sexes",
        "Battle of the Sexes",  # Ballet  Movies
        ["Ballet", "Movies"],
        ["Ballet", "Movies"],  # Ballet  2,1     0,0
        [[2, 0], [0, 1]],  # Movies  0,0     1,2
        [[1, 0], [0, 2]])

示例#4

0

显示文件

文件： utils_matrix.py 项目： YannouRavoet/ml_project

def _staghunt_easy():  # COORDINATION
    return pyspiel.create_matrix_game(
        "staghunt",
        "StagHunt",  # Stag       Hare
        ["Stag", "Hare"],
        ["Stag", "Hare"],  # Stag     1,1       0,2/3
        [[1, 0], [2 / 3, 2 / 3]],  # Hare   2/3,0     2/3,2/3
        [[1, 2 / 3], [0, 2 / 3]])

示例#5

0

显示文件

文件： utils_matrix.py 项目： YannouRavoet/ml_project

def _prisonners_dilemma_easy():  # NON ZERO-SUM
    return pyspiel.create_matrix_game(
        "prisonners_dilemma",
        "Prisoners Dilemma",  # Talk    Silent
        ["Talk", "Silent"],
        ["Talk", "Silent"],  # Talk    -6,-6   0,-12
        [[3, 3], [0, 5]],  # Silent  -12,0   -3,-3
        [[5, 0], [1, 1]])

示例#6

0

显示文件

文件： utils_matrix.py 项目： YannouRavoet/ml_project

def _matching_pennies_easy():  # ZERO-SUM
    return pyspiel.create_matrix_game(
        "matching_pennies",
        "Matching Pennies",  # Heads   Tails
        ["Heads", "Tails"],
        ["Heads", "Tails"],  # Heads   -1,1    1,-1
        [[-1, 1], [1, -1]],  # Tails   1,-1    -1,1
        [[1, -1], [-1, 1]])

示例#7

0

显示文件

文件： utils_matrix.py 项目： YannouRavoet/ml_project

def _biased_rock_paper_scissors_easy():  # ZERO-SUM
    return pyspiel.create_matrix_game(
        "biased_rock_paper_scissors",
        "Biased Rock Paper Scissors",
        # Rock    Paper   Scissors
        ["Rock", "Paper", "Scissors"],
        ["Rock", "Paper", "Scissors"],
        # Rock        0,0    -3,3      1,-1
        [[0, -3, 1], [3, 0, -2], [-1, 2, 0]
         ],  # Paper       3,-3    0,0     -2,2
        [[0, 3, -1], [-3, 0, 2], [1, -2, 0]
         ])  # Scissor    -1,1     2,-2     0,0

示例#8

0

显示文件

def matrix_rps_biased_phaseplot(size=None, fig=None):
    fig = plt.figure(figsize=(10, 10)) if fig is None else fig
    size = 111 if size is None else size
    assert isinstance(fig, plt.Figure)

    payoff_tensor = np.array([[[0, -1, 2], [1, 0, -1], [-2, 1, 0]],
                              [[0, 1, -2], [-1, 0, 1], [2, -1, 0]]])
    dyn = dynamics.SinglePopulationDynamics(payoff_tensor, dynamics.replicator)
    sub = fig.add_subplot(size, projection="3x3")
    sub.quiver(dyn)

    sub.set_title("Phaseplot Rock Paper Scissors")
    return sub, pyspiel.create_matrix_game(payoff_tensor[0], payoff_tensor[1])

示例#9

0

显示文件

文件： lp_solver_test.py 项目： DailyActie/AI_RL_APP-open_spiel

 def test_rock_paper_scissors(self):
     p0_sol, p1_sol, p0_sol_val, p1_sol_val = (
         lp_solver.solve_zero_sum_matrix_game(
             pyspiel.create_matrix_game(
                 [[0.0, -1.0, 1.0], [1.0, 0.0, -1.0], [-1.0, 1.0, 0.0]],
                 [[0.0, 1.0, -1.0], [-1.0, 0.0, 1.0], [1.0, -1.0, 0.0]])))
     self.assertEqual(len(p0_sol), 3)
     self.assertEqual(len(p1_sol), 3)
     for i in range(3):
         self.assertAlmostEqual(p0_sol[i], 1.0 / 3.0)
         self.assertAlmostEqual(p1_sol[i], 1.0 / 3.0)
     self.assertAlmostEqual(p0_sol_val, 0.0)
     self.assertAlmostEqual(p1_sol_val, 0.0)

示例#10

0

显示文件

def lp_solve(meta_games, checkpoint_dir=None):
    meta_games = [x.tolist() for x in meta_games]
    if len(meta_games) != 2:
        raise NotImplementedError(
            "nash_strategy solver works only for 2p zero-sum"
            "games, but was invoked for a {} player game".format(
                len(meta_games)))
    nash_prob_1, nash_prob_2, _, _ = (solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(*meta_games)))
    result = [
        renormalize(np.array(nash_prob_1).reshape(-1)),
        renormalize(np.array(nash_prob_2).reshape(-1))
    ]
    return result

示例#11

0

显示文件

def main(_):
    # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_mp"))
    # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_rps"))
    p0_sol, p1_sol, p0_sol_val, p1_sol_val = lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(
            [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]],
            [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]]))
    print("p0 val = {}, policy = {}".format(p0_sol_val, p0_sol))
    print("p1 val = {}, policy = {}".format(p1_sol_val, p1_sol))
    print(p0_sol[1])

    mixture = lp_solver.is_dominated(
        0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]],
        0,
        lp_solver.DOMINANCE_WEAK,
        return_mixture=True)
    print(mixture)

示例#12

0

显示文件

 def test_asymmetric_pure_nonzero_val(self):
   #        c0      c1       c2
   # r0 | 2, -2 |  1, -1 |  5, -5
   # r1 |-3,  3 | -4,  4 | -2,  2
   #
   # Pure eq (r0,c1) for a value of (1, -1)
   # 2nd row is dominated, and then second player chooses 2nd col.
   p0_sol, p1_sol, p0_sol_val, p1_sol_val = (
       lp_solver.solve_zero_sum_matrix_game(
           pyspiel.create_matrix_game([[2.0, 1.0, 5.0], [-3.0, -4.0, -2.0]],
                                      [[-2.0, -1.0, -5.0], [3.0, 4.0, 2.0]])))
   self.assertLen(p0_sol, 2)
   self.assertLen(p1_sol, 3)
   self.assertAlmostEqual(p0_sol[0], 1.0)
   self.assertAlmostEqual(p0_sol[1], 0.0)
   self.assertAlmostEqual(p1_sol[0], 0.0)
   self.assertAlmostEqual(p1_sol[1], 1.0)
   self.assertAlmostEqual(p0_sol_val, 1.0)
   self.assertAlmostEqual(p1_sol_val, -1.0)

示例#13

0

显示文件

 def test_biased_rock_paper_scissors(self):
   # See sec 6.2 of Bosansky et al. 2016. Algorithms for Computing Strategies
   # in Two-Player Simultaneous Move Games
   # http://mlanctot.info/files/papers/aij-2psimmove.pdf
   p0_sol, p1_sol, p0_sol_val, p1_sol_val = (
       lp_solver.solve_zero_sum_matrix_game(
           pyspiel.create_matrix_game(
               [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]],
               [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]])))
   self.assertLen(p0_sol, 3)
   self.assertLen(p1_sol, 3)
   self.assertAlmostEqual(p0_sol[0], 1.0 / 16.0, places=4)
   self.assertAlmostEqual(p1_sol[0], 1.0 / 16.0, places=4)
   self.assertAlmostEqual(p0_sol[1], 10.0 / 16.0, places=4)
   self.assertAlmostEqual(p1_sol[1], 10.0 / 16.0, places=4)
   self.assertAlmostEqual(p0_sol[2], 5.0 / 16.0, places=4)
   self.assertAlmostEqual(p1_sol[2], 5.0 / 16.0, places=4)
   self.assertAlmostEqual(p0_sol_val, 0.0)
   self.assertAlmostEqual(p1_sol_val, 0.0)

示例#14

0

显示文件

def nash_strategy(solver):
    """Returns nash distribution on meta game matrix.

  This method only works for two player zero-sum games.

  Args:
    solver: GenPSROSolver instance.

  Returns:
    Nash distribution on strategies.
  """
    meta_games = solver.get_meta_game
    if not isinstance(meta_games, list):
        meta_games = [meta_games, -meta_games]
    meta_games = [x.tolist() for x in meta_games]
    nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(*meta_games)))
    return [
        renormalize(np.array(nash_prob_1).reshape(-1)),
        renormalize(np.array(nash_prob_2).reshape(-1))
    ]

示例#15

0

显示文件

def nash_solver(meta_games,
                solver="gambit",
                mode="one",
                gambit_path=None,
                lrsnash_path=None):
    """
    Solver for NE.
    :param meta_games: meta-games in PSRO.
    :param solver: options "gambit", "nashpy", "linear", "lrsnash", "replicator".
    :param mode: options "all", "one", "pure"
    :param lrsnash_path: path to lrsnash solver.
    :return: a list of NE.
    WARNING:
    opening up a subprocess in every iteration eventually
    leads the os to block the subprocess. Not usable.
    """
    num_players = len(meta_games)
    if solver == "gambit":
        return gambit_solve(meta_games, mode, gambit_path=gambit_path)
    elif solver == "replicator":
        return [replicator_dynamics(meta_games)]
    else:
        assert num_players == 2

        num_rows, num_cols = np.shape(meta_games[0])
        row_payoffs, col_payoffs = meta_games[0], meta_games[1]

        if num_rows == 1 or num_cols == 1:
            equilibria = itertools.product(np.eye(num_rows), np.eye(num_cols))
        elif mode == 'pure':
            return pure_ne_solve(meta_games)

        elif solver == "linear":
            meta_games = [x.tolist() for x in meta_games]
            nash_prob_1, nash_prob_2, _, _ = (
                lp_solver.solve_zero_sum_matrix_game(
                    pyspiel.create_matrix_game(*meta_games)))
            return [
                renormalize(np.array(nash_prob_1).reshape(-1)),
                renormalize(np.array(nash_prob_2).reshape(-1))
            ]
        elif solver == "lrsnash":
            logging.info("Using lrsnash solver.")
            equilibria = lrs_solve(row_payoffs, col_payoffs, lrsnash_path)
        elif solver == "nashpy":
            if mode == "all":
                logging.info("Using nashpy vertex enumeration.")
                equilibria = nashpy.Game(row_payoffs,
                                         col_payoffs).vertex_enumeration()
            else:
                logging.info("Using nashpy Lemke-Howson solver.")
                equilibria = lemke_howson_solve(row_payoffs, col_payoffs)
        else:
            raise ValueError("Please choose a valid NE solver.")

        equilibria = iter(equilibria)
        # check that there's at least one equilibrium
        try:
            equilibria = itertools.chain([next(equilibria)], equilibria)
        except StopIteration:
            logging.warning("degenerate game!")
            #            pklfile = open('/home/qmaai/degenerate_game.pkl','wb')
            #            pickle.dump([row_payoffs,col_payoffs],pklfile)
            #            pklfile.close()
            # degenerate game apply support enumeration
            equilibria = nashpy.Game(row_payoffs,
                                     col_payoffs).support_enumeration()
            try:
                equilibria = itertools.chain([next(equilibria)], equilibria)
            except StopIteration:
                logging.warning("no equilibrium!")

        equilibria = list(equilibria)
        if mode == 'all':
            return equilibria
        elif mode == 'one':
            return equilibria[0]
        else:
            raise ValueError("Please choose a valid mode.")

示例#16

0

显示文件

文件： matrix_nash.py 项目： ngrupen/open_spiel

 def gen():
     p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game(
         pyspiel.create_matrix_game(row_payoffs - col_payoffs,
                                    col_payoffs - row_payoffs))
     yield (np.squeeze(p0_sol, 1), np.squeeze(p1_sol, 1))

示例#17

0

显示文件

def solve_subgame(subgame_payoffs):
    """Solves the subgame using OpenSpiel's LP solver."""
    p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game(
        pyspiel.create_matrix_game(*subgame_payoffs))
    p0_sol, p1_sol = np.asarray(p0_sol), np.asarray(p1_sol)
    return [p0_sol / p0_sol.sum(), p1_sol / p1_sol.sum()]

示例#18

0

显示文件

def value_iteration(game, depth_limit, threshold, cyclic_game=False):
    """Solves for the optimal value function of a game.

  For small games only! Solves the game using value iteration,
  with the maximum error for the value function less than threshold.
  This algorithm works for sequential 1-player games or 2-player zero-sum
  games, with or without chance nodes.

  Arguments:
    game: The game to analyze, as returned by `load_game`.
    depth_limit: How deeply to analyze the game tree. Negative means no limit, 0
      means root-only, etc.
    threshold: Maximum error for state values..
    cyclic_game: set to True if the game has cycles (from state A we can get to
      state B, and from state B we can get back to state A).


  Returns:
    A `dict` with string keys and float values, mapping string encoding of
    states to the values of those states.
  """
    assert game.num_players() in (1, 2), (
        "Game must be a 1-player or 2-player game")
    if game.num_players() == 2:
        assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM, (
            "2-player games must be zero sum games")

    # Must be perfect information or one-shot (not imperfect information).
    assert (game.get_type().information
            == pyspiel.GameType.Information.ONE_SHOT
            or game.get_type().information
            == pyspiel.GameType.Information.PERFECT_INFORMATION)

    # We expect Value Iteration to be used with perfect information games, in
    # which `str` is assumed to display the state of the game.
    states = get_all_states.get_all_states(game,
                                           depth_limit,
                                           True,
                                           False,
                                           to_string=str,
                                           stop_if_encountered=cyclic_game)
    values = {}
    transitions = {}

    _initialize_maps(states, values, transitions)
    error = threshold + 1  # A value larger than threshold
    min_utility = game.min_utility()
    while error > threshold:
        error = 0
        for key, state in states.items():
            if state.is_terminal():
                continue
            elif state.is_simultaneous_node():
                # Simultaneous node. Assemble a matrix game from the child utilities.
                # and solve it using a matrix game solver.
                p0_utils = []  # row player
                p1_utils = []  # col player
                row = 0
                for p0action in state.legal_actions(0):
                    # new row
                    p0_utils.append([])
                    p1_utils.append([])
                    for p1action in state.legal_actions(1):
                        # loop from left-to-right of columns
                        next_states = transitions[(key, p0action, p1action)]
                        joint_q_value = sum(p * values[next_state]
                                            for next_state, p in next_states)
                        p0_utils[row].append(joint_q_value)
                        p1_utils[row].append(-joint_q_value)
                    row += 1
                stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils)
                solution = lp_solver.solve_zero_sum_matrix_game(stage_game)
                value = solution[2]
            else:
                # Regular decision node
                player = state.current_player()
                value = min_utility if player == 0 else -min_utility
                for action in state.legal_actions():
                    next_states = transitions[(key, action)]
                    q_value = sum(p * values[next_state]
                                  for next_state, p in next_states)
                    if player == 0:
                        value = max(value, q_value)
                    else:
                        value = min(value, q_value)
            error = max(abs(values[key] - value), error)
            values[key] = value

    return values

示例#19

0

显示文件

def _even_easier_create_game():
    """Leave out the names too, if you prefer."""
    return pyspiel.create_matrix_game([[-1, 1], [1, -1]], [[1, -1], [-1, 1]])

示例#20

0

显示文件

def _easy_create_game():
    """Uses the helper function to create the same game as above."""
    return pyspiel.create_matrix_game("matching_pennies", "Matching Pennies",
                                      ["Heads", "Tails"], ["Heads", "Tails"],
                                      [[-1, 1], [1, -1]], [[1, -1], [-1, 1]])

示例#21

0

显示文件

def get_game(game_name):
    if isinstance(game_name,pyspiel.MatrixGame) or game_name != "matrix_bots":
        return game_name
    else:
        return pyspiel.create_matrix_game([[3,0],[0,2]],
                                          [[2,0],[0,3]])

示例#22

0

显示文件

文件： matrix_game_example.py 项目： julianhartmann1/HCII

def _import_data_create_game():
    """Creates a game via imported payoff data."""
    payoff_file = file_utils.find_file(
        "open_spiel/data/paper_data/response_graph_ucb/soccer.txt", 2)
    payoffs = np.loadtxt(payoff_file) * 2 - 1
    return pyspiel.create_matrix_game(payoffs, payoffs.T)