def exploitability(self, params, payoff_matrices): """Compute and return tsallis entropy regularized exploitability. Args: params: tuple of params (dist, y), see ate.gradients payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action Returns: float, exploitability of current dist """ return exp.ate_exploitability(params, payoff_matrices, self.p)
def test_ate_exploitability_of_rand(self, payoff_tensor, p, seed=None): trials = 100 random = np.random.RandomState(seed) num_strategies = payoff_tensor.shape[-1] dists = random.rand(trials, num_strategies) dists /= np.sum(dists, axis=1, keepdims=True) exploitable = [] for dist in dists: exp = exploitability.ate_exploitability(dist, payoff_tensor, p) exploitable.append(exp > 0.) perc = 100 * np.mean(exploitable) logging.info('rand strat exploitable rate out of %d is %f', trials, perc) self.assertEqual(perc, 100., 'found rand strat that was nash')
def test_ate_exploitability_of_non_nash(self, payoff_tensor, p, dist, exp): # assumes symmetric games exp_pred = exploitability.ate_exploitability(dist, payoff_tensor, p) self.assertAlmostEqual(exp_pred, exp, msg='dist should have the given exploitability')
def test_ate_exploitability_of_nash(self, payoff_tensor, nash, p): # assumes symmetric games exp = exploitability.ate_exploitability(nash, payoff_tensor, p) self.assertGreaterEqual( 0., exp, 'uniform nash should have zero exploitability')