def test_expected_value_exact(self): game, _, _ = create_neural_rock_paper_scissors() strategy1 = extensive_game.Strategy({ game.info_set_ids[game.get_node(())]: extensive_game.ActionFloat({ 'R': 0.5, 'P': 0.2, 'S': 0.3 }) }) strategy2 = extensive_game.Strategy({ game.info_set_ids[game.get_node(('R', ))]: extensive_game.ActionFloat({ 'R': 0.2, 'P': 0.3, 'S': 0.5 }) }) computed1, computed2 = game.expected_value_exact(strategy1=strategy1, strategy2=strategy2) expected1 = 0.5 * (0.2 * 0 + 0.3 * -1 + 0.5 * 1) + \ 0.2 * (0.2 * 1 + 0.3 * 0 + 0.5 * -1) + \ 0.3 * (0.2 * -1 + 0.3 * 1 + 0.5 * 0) self.assertEqual(computed1, expected1) self.assertEqual(computed2, -expected1)
def test_is_strategy_complete(self): game, _, _ = create_neural_rock_paper_scissors() # Incomplete because missing an information set. strategy = extensive_game.Strategy({ (): extensive_game.ActionFloat({ 'R': 0.4, 'P': 0.5, 'S': 0.1 }) }) computed = game.is_strategy_complete(strategy) self.assertEqual(computed, False) # Incomplete because missing an action. strategy = extensive_game.Strategy({ (): extensive_game.ActionFloat({ 'R': 0.4, 'P': 0.5, 'S': 0.1 }), (-1, ): extensive_game.ActionFloat({ 'R': 0.4, 'P': 0.5 }) }) computed = game.is_strategy_complete(strategy) self.assertEqual(computed, False) # Complete. strategy = extensive_game.Strategy({ (): extensive_game.ActionFloat({ 'R': 0.4, 'P': 0.5, 'S': 0.1 }), (-1, ): extensive_game.ActionFloat({ 'R': 0.4, 'P': 0.3, 'S': 0.3 }) }) computed = game.is_strategy_complete(strategy) self.assertEqual(computed, True)
def test_copy_strategy(self): strategy1 = extensive_game.Strategy({ 'info1': extensive_game.ActionFloat({ 'a1': 0.4, 'a2': 0.6 }), 'info2': extensive_game.ActionFloat({ 'a2': 0.3, 'a4': 0.7 }) }) strategy2 = strategy1.copy() self.assertEqual(strategy1['info1'], strategy2['info1']) self.assertEqual(strategy1['info2'], strategy2['info2']) strategy1['info1'] = extensive_game.ActionFloat({'a1': 0.2, 'a2': 0.8}) self.assertEqual(strategy1['info2'], strategy2['info2']) self.assertEqual(strategy2['info1'], extensive_game.ActionFloat({ 'a1': 0.4, 'a2': 0.6 }))
def test_equals(self): strategy1 = extensive_game.Strategy({ 'info1': extensive_game.ActionFloat({ 'a1': 0.4, 'a2': 0.6 }), 'info2': extensive_game.ActionFloat({ 'a2': 0.3, 'a4': 0.7 }) }) # Same as strategy1 strategy2 = extensive_game.Strategy({ 'info1': extensive_game.ActionFloat({ 'a1': 0.4, 'a2': 0.6 }), 'info2': extensive_game.ActionFloat({ 'a2': 0.3, 'a4': 0.7 }) }) # Different to strategy1 strategy3 = extensive_game.Strategy({ 'info1': extensive_game.ActionFloat({ 'a1': 0.3, 'a2': 0.7 }), 'info2': extensive_game.ActionFloat({ 'a2': 0.3, 'a4': 0.7 }) }) self.assertEqual(strategy1, strategy2) self.assertNotEqual(strategy1, strategy3)
def test_rock_paper_scissors(self): game = rock_paper_scissors.create_rock_paper_scissors() info_set_1 = game.info_set_ids[game.root] info_set_2 = game.info_set_ids[game.root.children['R']] # Player 1 plays (R, P, S) with probabilities (0.5, 0.3, 0.2), respectively. sigma_1 = extensive_game.Strategy({ info_set_1: extensive_game.ActionFloat({ 'R': 0.5, 'P': 0.3, 'S': 0.2 }) }) # Player 2 plays (R, P, S) with probabilities (0.3, 0.3, 0.4), respectively. sigma_2 = extensive_game.Strategy({ info_set_2: extensive_game.ActionFloat({ 'R': 0.3, 'P': 0.3, 'S': 0.4 }), }) # Check the values. expected_utility_1, expected_utility_2 = cfr_metrics.compute_expected_utility( game, sigma_1, sigma_2) utility_root = ( 0.5 * (0 * 0.3 + -1 * 0.3 + 1 * 0.4) + # RR, RP, RS 0.3 * (1 * 0.3 + 0 * 0.3 + -1 * 0.4) + # PR, PP, PS 0.2 * (-1 * 0.3 + 1 * 0.3 + 0 * 0.4)) # SR, SP, SS self.assertEqual(expected_utility_1[game.get_node(())], utility_root) utility_R = 0 * 0.3 + 1 * 0.3 + -1 * 0.4 # RR + RP + RS self.assertEqual(expected_utility_2[game.get_node(('R', ))], utility_R) utility_P = -1 * 0.3 + 0 * 0.3 + 1 * 0.4 # PR, PP, PS self.assertEqual(expected_utility_2[game.get_node(('P', ))], utility_P) utility_S = 1 * 0.3 + -1 * 0.3 + 0 * 0.4 # SR, SP, SS self.assertEqual(expected_utility_2[game.get_node(('S', ))], utility_S)
def compute_strategy(self): """ Returns a strategy corresponding to the average strategy. Returns: strategy: """ strategy = dict() for info_set in self.alpha: strategy[info_set] = extensive_game.ActionFloat.normalise( self.alpha[info_set]) return extensive_game.Strategy(strategy)
def test_compute_weighted_strategy(self): strategies = { 'info1': [(1.0, extensive_game.ActionFloat({ 'a1': 0.4, 'a2': 0.6 })), (2.0, extensive_game.ActionFloat({ 'a1': 0.5, 'a2': 0.5 }))], 'info2': [(3.0, extensive_game.ActionFloat({ 'a1': 0.6, 'a2': 0.4 })), (2.0, extensive_game.ActionFloat({ 'a1': 0.3, 'a2': 0.7 })), (1.0, extensive_game.ActionFloat({ 'a1': 0.0, 'a2': 1.0 }))] } expected = extensive_game.Strategy({ 'info1': extensive_game.ActionFloat({ 'a1': (1.0 * 0.4 + 2.0 * 0.5) / (1.0 + 2.0), 'a2': (1.0 * 0.6 + 2.0 * 0.5) / (1.0 + 2.0) }), 'info2': extensive_game.ActionFloat({ 'a1': (3.0 * 0.6 + 2.0 * 0.3 + 1.0 * 0.0) / (3.0 + 2.0 + 1.0), 'a2': (3.0 * 0.4 + 2.0 * 0.7 + 1.0 * 1.0) / (3.0 + 2.0 + 1.0) }) }) computed = extensive_game.compute_weighted_strategy(strategies) self.assertEqual(computed, expected)
def test_rock_paper_scissors_recursive(self): game = rock_paper_scissors.create_rock_paper_scissors() info_set_1 = game.info_set_ids[game.root] info_set_2 = game.info_set_ids[game.root.children['R']] # Player 1 plays (R, P, S) with probabilities (0.5, 0.3, 0.2), respectively. sigma_1 = extensive_game.Strategy({ info_set_1: extensive_game.ActionFloat({ 'R': 0.5, 'P': 0.3, 'S': 0.2 }) }) # Player 2 plays (R, P, S) with probabilities (0.3, 0.3, 0.4), respectively. sigma_2 = extensive_game.Strategy({ info_set_2: extensive_game.ActionFloat({ 'R': 0.3, 'P': 0.3, 'S': 0.4 }), }) # Check that terminal nodes have value equal to their utility to the player. terminal_nodes = [ game.get_node((a1, a2)) for a1 in ['R', 'P', 'S'] for a2 in ['R', 'P', 'S'] ] for node in terminal_nodes: v1, v2 = cfr_metrics.compute_expected_utility_recursive( game, node, sigma_1, sigma_2, collections.defaultdict(float), collections.defaultdict(float), 1.0, 1.0, 1.0) expected_v1 = node.utility[1] expected_v2 = node.utility[2] self.assertEqual(v1, expected_v1) self.assertEqual(v2, expected_v2) # Check the values of the player 2 nodes. v1, v2 = cfr_metrics.compute_expected_utility_recursive( game, game.get_node(('R', )), sigma_1, sigma_2, collections.defaultdict(float), collections.defaultdict(float), 0.5, 1.0, 1.0) self.assertEqual(v1, 0 * 0.3 + -1 * 0.3 + 1 * 0.4) self.assertEqual(v2, 0 * 0.3 + 1 * 0.3 + -1 * 0.4) # Check the values of the (only) player 1 node. v1, v2 = cfr_metrics.compute_expected_utility_recursive( game, game.get_node(()), sigma_1, sigma_2, collections.defaultdict(float), collections.defaultdict(float), 0.5, 1.0, 1.0) self.assertEqual( v1, ( 0.5 * (0 * 0.3 + -1 * 0.3 + 1 * 0.4) + # RR, RP, RS 0.3 * (1 * 0.3 + 0 * 0.3 + -1 * 0.4) + # PR, PP, PS 0.2 * (-1 * 0.3 + 1 * 0.3 + 0 * 0.4))) # SR, SP, SS self.assertEqual( v2, ( 0.5 * (0 * 0.3 + 1 * 0.3 + -1 * 0.4) + # RR, RP, RS 0.3 * (-1 * 0.3 + 0 * 0.3 + 1 * 0.4) + # PR, PP, PS 0.2 * (1 * 0.3 + -1 * 0.3 + 0 * 0.4))) # SR, SP, SS