示例#1
0
    def exploitability(self, params, payoff_matrices):
        """Compute and return tsallis entropy regularized exploitability.

    Args:
      params: tuple of params (dist, y), see ate.gradients
      payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action
    Returns:
      float, exploitability of current dist
    """
        return exp.ate_exploitability(params, payoff_matrices, self.p)
 def test_ate_exploitability_of_rand(self, payoff_tensor, p, seed=None):
     trials = 100
     random = np.random.RandomState(seed)
     num_strategies = payoff_tensor.shape[-1]
     dists = random.rand(trials, num_strategies)
     dists /= np.sum(dists, axis=1, keepdims=True)
     exploitable = []
     for dist in dists:
         exp = exploitability.ate_exploitability(dist, payoff_tensor, p)
         exploitable.append(exp > 0.)
     perc = 100 * np.mean(exploitable)
     logging.info('rand strat exploitable rate out of %d is %f', trials,
                  perc)
     self.assertEqual(perc, 100., 'found rand strat that was nash')
 def test_ate_exploitability_of_non_nash(self, payoff_tensor, p, dist, exp):
     # assumes symmetric games
     exp_pred = exploitability.ate_exploitability(dist, payoff_tensor, p)
     self.assertAlmostEqual(exp_pred,
                            exp,
                            msg='dist should have the given exploitability')
 def test_ate_exploitability_of_nash(self, payoff_tensor, nash, p):
     # assumes symmetric games
     exp = exploitability.ate_exploitability(nash, payoff_tensor, p)
     self.assertGreaterEqual(
         0., exp, 'uniform nash should have zero exploitability')