def prisoners_dilemma():
  """Returns prisoners dilemma game from examples.

  Used to test complex behaviours like transitions, events info, etc.
  """

  art = ['####d####',
         'a  A B  b',
         '#########'
        ]

  item_a = game.Item(color=(0, 254, 254))
  item_b = game.Item(color=(254, 254, 0))
  item_d = game.Item(color=(0, 254, 254))
  items = {'a': item_a, 'b': item_b, 'd': item_d}
  player_a = game.Player(color=(0, 100, 254))
  player_b = game.Player(color=(254, 100, 0))
  players = {'A': player_a, 'B': player_b}

  env = game.Game(art, items, players, tabular=True)

  env.add_reward('A_moves', {'A': -1})
  env.add_reward('B_moves', {'B': -1})
  env.add_reward('A_collects_a', {'A': 100})
  env.add_reward('B_collects_b', {'B': 100})
  env.add_reward('A_collects_d', {'A': 100})
  env.add_reward('B_collects_d', {'B': 100})
  env.add_terminaison('A_collects_d')
  env.add_terminaison('B_collects_d')
  env.add_terminaison('A_collects_a')
  env.add_terminaison('B_collects_b')
  return env
示例#2
0
def create_game():
    """Create the assymetric game."""

    art = ['############', '#a b   ABc #', '############']

    item_a = game.Item(color=(0, 254, 254))
    item_b = game.Item(color=(254, 254, 0))
    item_c = game.Item(color=(0, 254, 254))
    items = {'a': item_a, 'b': item_b, 'c': item_c}

    player_a = game.Player(color=(0, 100, 254))
    player_b = game.Player(color=(254, 100, 0))

    players = {'A': player_a, 'B': player_b}

    env = game.Game(art, items, players, tabular=True)
    env.display()

    env.add_reward('A_moves', {'A': -10})
    env.add_reward('B_moves', {'B': -10})
    env.add_reward('A_collects_a', {'A': 100})
    env.add_reward('A_collects_c', {'A': 100})
    env.add_reward('B_collects_b', {'B': 100})

    env.add_terminaison('A_collects_a')
    env.add_terminaison('A_collects_c')
    env.add_terminaison('B_collects_b')

    # For frame-by-frame visualization:
    env = visualizer.Visualizer(env, fps=2, by_episode=False)

    # For fast visualization:
    # env = visualizer.Visualizer(env, fps=1000, by_episode=True)

    return env
示例#3
0
def create_game():
    """Creates the temptation game."""

    art = [
        '########', 'a  AB  a', 'b      b', 'c      c', 'd      d', 'e      e',
        'f      f', 'g      g', 'h      h', 'i      i', 'j      j', '########'
    ]

    item_a = game.Item(color=(60, 254, 254))
    item_b = game.Item(color=(80, 254, 254))
    item_c = game.Item(color=(100, 254, 254))
    item_d = game.Item(color=(120, 254, 254))
    item_e = game.Item(color=(140, 254, 254))
    item_f = game.Item(color=(160, 254, 254))
    item_g = game.Item(color=(180, 254, 254))
    item_h = game.Item(color=(200, 254, 254))
    item_i = game.Item(color=(220, 254, 254))
    item_j = game.Item(color=(254, 254, 254))

    items = {
        'a': item_a,
        'b': item_b,
        'c': item_c,
        'd': item_d,
        'e': item_e,
        'f': item_f,
        'g': item_g,
        'h': item_h,
        'i': item_i,
        'j': item_j,
    }

    player_a = game.Player(color=(0, 100, 254))
    player_b = game.Player(color=(254, 100, 0))

    players = {'A': player_a, 'B': player_b}

    env = game.Game(art, items, players, tabular=True)
    env.display()

    env.add_reward('A_moves', {'A': -1})
    env.add_reward('B_moves', {'B': -1})
    for i, item in enumerate('abcdefghij'):
        env.add_reward('A_collects_' + item, {'A': (i + 1) * 10})
        env.add_reward('B_collects_' + item, {'B': (i + 1) * 10})
        env.add_terminaison('A_collects_' + item)
        env.add_terminaison('B_collects_' + item)

    # for frame-by-frame visualization:
    env = visualizer.Visualizer(env, fps=2, by_episode=False)

    # for fast visualization:
    # env = visualizer.Visualizer(env, fps=1000, by_episode=True)

    return env
示例#4
0
def small_grid():
    """Returns a small grid game, used to test state generation."""

    art = ['###', 'A##', '##B']

    items = {}
    player_a = game.Player(color=(0, 100, 254))
    player_b = game.Player(color=(254, 100, 0))
    players = {'A': player_a, 'B': player_b}

    env = game.Game(art, items, players, tabular=True, max_steps=50)
    return env
示例#5
0
    def test_transition(self):

        # (this is the prisonners dilemma grid game from examples)
        art = ['####d####', 'a  A B  b', '#########']

        item_a = game.Item(color=(0, 254, 254))
        item_b = game.Item(color=(254, 254, 0))
        item_d = game.Item(color=(254, 254, 254))

        items = {'a': item_a, 'b': item_b, 'd': item_d}

        player_a = game.Player(color=(0, 100, 254))
        player_b = game.Player(color=(254, 100, 0))

        players = {'A': player_a, 'B': player_b}

        env = game.Game(art, items, players, tabular=True, max_steps=50)

        env.add_reward('A_moves', {'A': -1})
        env.add_reward('B_moves', {'B': -1})
        env.add_reward('A_collects_a', {'A': 100})
        env.add_reward('B_collects_b', {'B': 100})
        env.add_reward('A_collects_d', {'A': 100})
        env.add_reward('B_collects_d', {'B': 100})

        env.add_terminaison('A_collects_d')
        env.add_terminaison('B_collects_d')
        env.add_terminaison('A_collects_a')
        env.add_terminaison('B_collects_b')

        _ = env.reset()

        # A goes in wall
        actions = [1, 0]
        _, rewards, _, infos = env.step(actions)
        self.assertIn('A_moves', infos['event_list'])
        self.assertIn('A_goes_in_walls', infos['event_list'])
        self.assertEqual(rewards[0], -1)
        _ = env.reset()

        # A goes in B
        actions = [4, 0]
        _ = env.step(actions)
        actions = [4, 0]
        _, rewards, _, infos = env.step(actions)
        self.assertIn('A_moves', infos['event_list'])
        self.assertIn('A_blocked_by_B', infos['event_list'])
        self.assertEqual(rewards[0], -1)
        _ = env.reset()

        # A and B reach same cell
        actions = [4, 3]
        _, rewards, _, infos = env.step(actions)
        self.assertIn('A_moves', infos['event_list'])
        self.assertIn('B_moves', infos['event_list'])
        self.assertTrue(('A_lost_the_drawn' in infos['event_list'])
                        or ('B_lost_the_drawn' in infos['event_list']))
        self.assertEqual(rewards[0], -1)
        self.assertEqual(rewards[1], -1)
        _ = env.reset()

        # A and B block each other
        actions = [4, 0]
        _ = env.step(actions)
        actions = [4, 3]
        _, rewards, _, infos = env.step(actions)
        self.assertIn('A_moves', infos['event_list'])
        self.assertIn('B_moves', infos['event_list'])
        self.assertIn('A_blocked_by_B', infos['event_list'])
        self.assertIn('B_blocked_by_A', infos['event_list'])
        self.assertEqual(rewards[0], -1)
        self.assertEqual(rewards[1], -1)
        _ = env.reset()

        # A reaches reward
        actions = [3, 0]
        _ = env.step(actions)
        actions = [3, 0]
        _ = env.step(actions)
        actions = [3, 0]
        _, rewards, _, infos = env.step(actions)
        self.assertIn('A_moves', infos['event_list'])
        self.assertIn('A_collects_a', infos['event_list'])
        self.assertEqual(rewards[0], 100 - 1)
        _ = env.reset()