Exemplos de qre_exploitability em Python, exemplos de open_spiel.python.algorithms.adidas_utils.helpers.symmetric.exploitability.qre_exploitability em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: exploitability_test.py Projeto: deepmind/open_spiel

 def test_qre_exploitability_of_nash(self, payoff_tensor, nash,
                                     temperature):
     # assumes symmetric games
     exp = exploitability.qre_exploitability(nash, payoff_tensor,
                                             temperature)
     self.assertGreaterEqual(
         1e-10, exp, 'uniform nash should have zero exploitability')

Exemplo n.º 2

0

Exibir arquivo

Arquivo: exploitability_test.py Projeto: deepmind/open_spiel

 def test_qre_exploitability_of_non_nash(self, payoff_tensor, temperature,
                                         dist, exp):
     # assumes symmetric games
     exp_pred = exploitability.qre_exploitability(dist, payoff_tensor,
                                                  temperature)
     self.assertAlmostEqual(exp_pred,
                            exp,
                            msg='dist should have the given exploitability')

Exemplo n.º 3

0

Exibir arquivo

  def exploitability(self, params, payoff_matrices):
    """Compute and return shannon entropy regularized exploitability.

    Args:
      params: tuple of params (dist, y), see qre.gradients
      payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action
    Returns:
      float, exploitability of current dist
    """
    return exp.qre_exploitability(params, payoff_matrices, self.temperature)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: exploitability_test.py Projeto: deepmind/open_spiel

 def test_qre_exploitability_of_rand(self,
                                     payoff_tensor,
                                     temperature,
                                     seed=None):
     trials = 100
     random = np.random.RandomState(seed)
     num_strategies = payoff_tensor.shape[-1]
     dists = random.rand(trials, num_strategies)
     dists /= np.sum(dists, axis=1, keepdims=True)
     exploitable = []
     for dist in dists:
         exp = exploitability.qre_exploitability(dist, payoff_tensor,
                                                 temperature)
         exploitable.append(exp > 0.)
     perc = 100 * np.mean(exploitable)
     logging.info('rand strat exploitable rate out of %d is %f', trials,
                  perc)
     self.assertEqual(perc, 100., 'found rand strat that was nash')