def nash_strategy(solver, return_joint=False): """Returns nash distribution on meta game matrix. This method only works for two player zero-sum games. Args: solver: GenPSROSolver instance. return_joint: If true, only returns marginals. Otherwise marginals as well as joint probabilities. Returns: Nash distribution on strategies. """ meta_games = solver.get_meta_game() if not isinstance(meta_games, list): meta_games = [meta_games, -meta_games] meta_games = [x.tolist() for x in meta_games] if len(meta_games) != 2: raise NotImplementedError( "nash_strategy solver works only for 2p zero-sum" "games, but was invoked for a {} player game".format( len(meta_games))) nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*meta_games))) result = [ renormalize(np.array(nash_prob_1).reshape(-1)), renormalize(np.array(nash_prob_2).reshape(-1)) ] if not return_joint: return result else: joint_strategies = get_joint_strategy_from_marginals(result) return result, joint_strategies
def _matrix_game(self, state): # This function sets up a matrix game, solves it and returns the policies p0_utils = [] # row player p1_utils = [] # col player row = 0 key = str(state) states = {key: state} transitions = {} value_iteration._initialize_maps(states, self._values, transitions) for p0action in state.legal_actions(0): # new row p0_utils.append([]) p1_utils.append([]) for p1action in state.legal_actions(1): # loop from left-to-right of columns next_states = transitions[(key, p0action, p1action)] joint_q_value = sum(p * self._values[next_state] for next_state, p in next_states) p0_utils[row].append(joint_q_value) p1_utils[row].append(-joint_q_value) row += 1 stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils) solution = lp_solver.solve_zero_sum_matrix_game(stage_game) probs = solution[0] actions = state.legal_actions( 0) # double check that order is consistent with probs return actions, probs
def test_extensive_to_matrix_game(self): kuhn_game = pyspiel.load_game("kuhn_poker") kuhn_matrix_game = pyspiel.extensive_to_matrix_game(kuhn_game) unused_p0_strategy, unused_p1_strategy, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game(kuhn_matrix_game)) # value from Kuhn 1950 or https://en.wikipedia.org/wiki/Kuhn_poker self.assertAlmostEqual(p0_sol_val, -1 / 18) self.assertAlmostEqual(p1_sol_val, +1 / 18)
def test_solve_blotto(self): blotto_matrix_game = pyspiel.load_matrix_game("blotto") p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game(blotto_matrix_game)) self.assertEqual(len(p0_sol), blotto_matrix_game.num_rows()) self.assertEqual(len(p1_sol), blotto_matrix_game.num_cols()) # Symmetric game, must be zero self.assertAlmostEqual(p0_sol_val, 0.0) self.assertAlmostEqual(p1_sol_val, 0.0)
def test_rock_paper_scissors(self): p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game( [[0.0, -1.0, 1.0], [1.0, 0.0, -1.0], [-1.0, 1.0, 0.0]], [[0.0, 1.0, -1.0], [-1.0, 0.0, 1.0], [1.0, -1.0, 0.0]]))) self.assertEqual(len(p0_sol), 3) self.assertEqual(len(p1_sol), 3) for i in range(3): self.assertAlmostEqual(p0_sol[i], 1.0 / 3.0) self.assertAlmostEqual(p1_sol[i], 1.0 / 3.0) self.assertAlmostEqual(p0_sol_val, 0.0) self.assertAlmostEqual(p1_sol_val, 0.0)
def main(_): # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_mp")) # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_rps")) p0_sol, p1_sol, p0_sol_val, p1_sol_val = lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game( [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]], [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]])) print("p0 val = {}, policy = {}".format(p0_sol_val, p0_sol)) print("p1 val = {}, policy = {}".format(p1_sol_val, p1_sol)) print(p0_sol[1]) mixture = lp_solver.is_dominated( 0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]], 0, lp_solver.DOMINANCE_WEAK, return_mixture=True) print(mixture)
def test_asymmetric_pure_nonzero_val(self): # c0 c1 c2 # r0 | 2, -2 | 1, -1 | 5, -5 # r1 |-3, 3 | -4, 4 | -2, 2 # # Pure eq (r0,c1) for a value of (1, -1) # 2nd row is dominated, and then second player chooses 2nd col. p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game([[2.0, 1.0, 5.0], [-3.0, -4.0, -2.0]], [[-2.0, -1.0, -5.0], [3.0, 4.0, 2.0]]))) self.assertLen(p0_sol, 2) self.assertLen(p1_sol, 3) self.assertAlmostEqual(p0_sol[0], 1.0) self.assertAlmostEqual(p0_sol[1], 0.0) self.assertAlmostEqual(p1_sol[0], 0.0) self.assertAlmostEqual(p1_sol[1], 1.0) self.assertAlmostEqual(p0_sol_val, 1.0) self.assertAlmostEqual(p1_sol_val, -1.0)
def test_biased_rock_paper_scissors(self): # See sec 6.2 of Bosansky et al. 2016. Algorithms for Computing Strategies # in Two-Player Simultaneous Move Games # http://mlanctot.info/files/papers/aij-2psimmove.pdf p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game( [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]], [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]]))) self.assertLen(p0_sol, 3) self.assertLen(p1_sol, 3) self.assertAlmostEqual(p0_sol[0], 1.0 / 16.0, places=4) self.assertAlmostEqual(p1_sol[0], 1.0 / 16.0, places=4) self.assertAlmostEqual(p0_sol[1], 10.0 / 16.0, places=4) self.assertAlmostEqual(p1_sol[1], 10.0 / 16.0, places=4) self.assertAlmostEqual(p0_sol[2], 5.0 / 16.0, places=4) self.assertAlmostEqual(p1_sol[2], 5.0 / 16.0, places=4) self.assertAlmostEqual(p0_sol_val, 0.0) self.assertAlmostEqual(p1_sol_val, 0.0)
def nash_strategy(solver): """Returns nash distribution on meta game matrix. This method only works for two player zero-sum games. Args: solver: GenPSROSolver instance. Returns: Nash distribution on strategies. """ meta_games = solver.get_meta_game if not isinstance(meta_games, list): meta_games = [meta_games, -meta_games] meta_games = [x.tolist() for x in meta_games] nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*meta_games))) return [ renormalize(np.array(nash_prob_1).reshape(-1)), renormalize(np.array(nash_prob_2).reshape(-1)) ]
def solve_subgame(subgame_payoffs): """Solves the subgame using OpenSpiel's LP solver.""" p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*subgame_payoffs)) p0_sol, p1_sol = np.asarray(p0_sol), np.asarray(p1_sol) return [p0_sol / p0_sol.sum(), p1_sol / p1_sol.sum()]
def gen(): p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(row_payoffs - col_payoffs, col_payoffs - row_payoffs)) yield (np.squeeze(p0_sol, 1), np.squeeze(p1_sol, 1))
def value_iteration(game, depth_limit, threshold, cyclic_game=False): """Solves for the optimal value function of a game. For small games only! Solves the game using value iteration, with the maximum error for the value function less than threshold. This algorithm works for sequential 1-player games or 2-player zero-sum games, with or without chance nodes. Arguments: game: The game to analyze, as returned by `load_game`. depth_limit: How deeply to analyze the game tree. Negative means no limit, 0 means root-only, etc. threshold: Maximum error for state values.. cyclic_game: set to True if the game has cycles (from state A we can get to state B, and from state B we can get back to state A). Returns: A `dict` with string keys and float values, mapping string encoding of states to the values of those states. """ assert game.num_players() in (1, 2), ( "Game must be a 1-player or 2-player game") if game.num_players() == 2: assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM, ( "2-player games must be zero sum games") # Must be perfect information or one-shot (not imperfect information). assert (game.get_type().information == pyspiel.GameType.Information.ONE_SHOT or game.get_type().information == pyspiel.GameType.Information.PERFECT_INFORMATION) # We expect Value Iteration to be used with perfect information games, in # which `str` is assumed to display the state of the game. states = get_all_states.get_all_states(game, depth_limit, True, False, to_string=str, stop_if_encountered=cyclic_game) values = {} transitions = {} _initialize_maps(states, values, transitions) error = threshold + 1 # A value larger than threshold min_utility = game.min_utility() while error > threshold: error = 0 for key, state in states.items(): if state.is_terminal(): continue elif state.is_simultaneous_node(): # Simultaneous node. Assemble a matrix game from the child utilities. # and solve it using a matrix game solver. p0_utils = [] # row player p1_utils = [] # col player row = 0 for p0action in state.legal_actions(0): # new row p0_utils.append([]) p1_utils.append([]) for p1action in state.legal_actions(1): # loop from left-to-right of columns next_states = transitions[(key, p0action, p1action)] joint_q_value = sum(p * values[next_state] for next_state, p in next_states) p0_utils[row].append(joint_q_value) p1_utils[row].append(-joint_q_value) row += 1 stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils) solution = lp_solver.solve_zero_sum_matrix_game(stage_game) value = solution[2] else: # Regular decision node player = state.current_player() value = min_utility if player == 0 else -min_utility for action in state.legal_actions(): next_states = transitions[(key, action)] q_value = sum(p * values[next_state] for next_state, p in next_states) if player == 0: value = max(value, q_value) else: value = min(value, q_value) error = max(abs(values[key] - value), error) values[key] = value return values
def nash_solver(meta_games, solver="gambit", mode="one", gambit_path=None, lrsnash_path=None): """ Solver for NE. :param meta_games: meta-games in PSRO. :param solver: options "gambit", "nashpy", "linear", "lrsnash", "replicator". :param mode: options "all", "one", "pure" :param lrsnash_path: path to lrsnash solver. :return: a list of NE. WARNING: opening up a subprocess in every iteration eventually leads the os to block the subprocess. Not usable. """ num_players = len(meta_games) if solver == "gambit": return gambit_solve(meta_games, mode, gambit_path=gambit_path) elif solver == "replicator": return [replicator_dynamics(meta_games)] else: assert num_players == 2 num_rows, num_cols = np.shape(meta_games[0]) row_payoffs, col_payoffs = meta_games[0], meta_games[1] if num_rows == 1 or num_cols == 1: equilibria = itertools.product(np.eye(num_rows), np.eye(num_cols)) elif mode == 'pure': return pure_ne_solve(meta_games) elif solver == "linear": meta_games = [x.tolist() for x in meta_games] nash_prob_1, nash_prob_2, _, _ = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*meta_games))) return [ renormalize(np.array(nash_prob_1).reshape(-1)), renormalize(np.array(nash_prob_2).reshape(-1)) ] elif solver == "lrsnash": logging.info("Using lrsnash solver.") equilibria = lrs_solve(row_payoffs, col_payoffs, lrsnash_path) elif solver == "nashpy": if mode == "all": logging.info("Using nashpy vertex enumeration.") equilibria = nashpy.Game(row_payoffs, col_payoffs).vertex_enumeration() else: logging.info("Using nashpy Lemke-Howson solver.") equilibria = lemke_howson_solve(row_payoffs, col_payoffs) else: raise ValueError("Please choose a valid NE solver.") equilibria = iter(equilibria) # check that there's at least one equilibrium try: equilibria = itertools.chain([next(equilibria)], equilibria) except StopIteration: logging.warning("degenerate game!") # pklfile = open('/home/qmaai/degenerate_game.pkl','wb') # pickle.dump([row_payoffs,col_payoffs],pklfile) # pklfile.close() # degenerate game apply support enumeration equilibria = nashpy.Game(row_payoffs, col_payoffs).support_enumeration() try: equilibria = itertools.chain([next(equilibria)], equilibria) except StopIteration: logging.warning("no equilibrium!") equilibria = list(equilibria) if mode == 'all': return equilibria elif mode == 'one': return equilibria[0] else: raise ValueError("Please choose a valid mode.")