Exemplo n.º 1
0
 def test_qre_exploitability_of_non_nash(self, payoff_tensor, temperature,
                                         dist, exp):
     no_op = lambda x: x
     exp_pred = exploitability.qre_exploitability(dist, payoff_tensor,
                                                  temperature, no_op)
     close = np.allclose(exp_pred, exp)
     msg = 'exploitability mismatch: pred={}, true={}'.format(exp_pred, exp)
     self.assertTrue(close, msg=msg)
Exemplo n.º 2
0
  def exploitability(self, params, payoff_matrices):
    """Compute and return tsallis entropy regularized exploitability.

    Args:
      params: tuple of params (dist, y), see ate.gradients
      payoff_matrices: dictionary with keys as tuples of agents (i, j) and
          values of (2 x A x A) np.arrays, payoffs for each joint action. keys
          are sorted and arrays should be indexed in the same order
    Returns:
      float, exploitability of current dist
    """
    return exp.qre_exploitability(params, payoff_matrices, self.temperature)
Exemplo n.º 3
0
 def test_qre_exploitability_of_rand(self,
                                     payoff_tensor,
                                     temperature,
                                     seed=None):
     trials = 100
     random = np.random.RandomState(seed)
     num_strategies = payoff_tensor[0].shape
     total_num_strategies = sum(num_strategies)
     pseudo_dists = random.rand(trials, total_num_strategies)
     exploitable = []
     for pseudo_dist in pseudo_dists:
         # first split and normalize pseudo_dist into strat for each player
         pseudo_dist_i = np.split(pseudo_dist,
                                  np.cumsum(num_strategies)[:-1])
         dist = [pdi / pdi.sum() for pdi in pseudo_dist_i]
         exp = exploitability.qre_exploitability(dist, payoff_tensor,
                                                 temperature, np.max)
         exploitable.append(exp > 0.)
     perc = 100 * np.mean(exploitable)
     logging.info('rand strat exploitable rate out of %d is %f', trials,
                  perc)
     self.assertEqual(perc, 100., 'found rand strat that was nash')
Exemplo n.º 4
0
 def test_qre_exploitability_of_nash(self, payoff_tensor, nash,
                                     temperature):
     exp = exploitability.qre_exploitability(nash, payoff_tensor,
                                             temperature, np.max)
     self.assertGreaterEqual(
         1e-10, exp, 'uniform nash should have zero exploitability')