Exemplo n.º 1
0
    def __init__(self, game_name, agent_num, action_num=12):
        Serializable.quick_init(self, locals())
        self.game = game_name
        self.agent_num = agent_num
        self.action_num = action_num
        self.action_spaces = MADiscrete([action_num] * self.agent_num)
        self.observation_spaces = MADiscrete([1] * self.agent_num)
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.numplots = 0
        self.payoff = {}

        if self.game == 'lemonade':
            assert self.agent_num == 3
            def get_distance(a_n, i):
                assert len(a_n) == 3
                a_n_i = np.copy(a_n)
                a_n_i[0], a_n_i[i] = a_n_i[i], a_n_i[0]
                return np.abs(a_n_i[0] - a_n_i[1]) + np.abs(a_n_i[0] - a_n_i[2])
            self.payoff = lambda a_n, i: get_distance(a_n, i)
Exemplo n.º 2
0
 def __init__(self,
              agent_num,
              mus=[0., 400.],
              sigmas=[100., 200.],
              action_low=0,
              action_high=10):
     Serializable.quick_init(self, locals())
     self.game_name = 'gaussian_squeeze'
     self.mus = np.array(mus)
     self.sigmas = np.array(sigmas)
     self.agent_num = agent_num
     self.action_range = [action_low, action_high]
     lows = np.array(
         [np.array([action_low]) for _ in range(self.agent_num)])
     highs = np.array(
         [np.array([action_high]) for _ in range(self.agent_num)])
     self.action_spaces = MABox(lows=lows, highs=highs)
     self.observation_spaces = MADiscrete([1] * self.agent_num)
     self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
Exemplo n.º 3
0
 def __init__(self,
              agent_num,
              game_name='pbeauty',
              p=0.67,
              reward_type='abs',
              action_low=-1.,
              action_high=1.):
     Serializable.quick_init(self, locals())
     self.agent_num = agent_num
     self.p = p
     self.game_name = game_name
     self.reward_type = reward_type
     self.action_range = [action_low, action_high]
     lows = np.array(
         [np.array([action_low]) for _ in range(self.agent_num)])
     highs = np.array(
         [np.array([action_high]) for _ in range(self.agent_num)])
     self.action_spaces = MABox(lows=lows, highs=highs)
     self.observation_spaces = MADiscrete([1] * self.agent_num)
     self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
     self.t = 0
     self.rewards = np.zeros((self.agent_num, ))
Exemplo n.º 4
0
    def __init__(self, game_name, agent_num, action_low=-10, action_high=10):
        Serializable.quick_init(self, locals())
        self.game = game_name
        self.agent_num = agent_num
        # self.action_num = action_num
        self.action_range = [action_low, action_high]
        lows = np.array(
            [np.array([action_low]) for _ in range(self.agent_num)])
        highs = np.array(
            [np.array([action_high]) for _ in range(self.agent_num)])
        self.action_spaces = MABox(lows=lows, highs=highs)
        self.observation_spaces = MADiscrete([1] * self.agent_num)
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.numplots = 0
        self.payoff = {}

        if self.game == 'zero_sum':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: a1 * a2
            self.payoff[1] = lambda a1, a2: -a1 * a2
        elif self.game == 'trigonometric':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: np.cos(a2) * a1
            self.payoff[1] = lambda a1, a2: np.sin(a1) * a2
        elif self.game == 'mataching_pennies':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
            self.payoff[1] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
        elif self.game == 'rotational':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2
            self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2
        elif self.game == 'wolf':
            assert self.agent_num == 2

            def V(alpha, beta, payoff):
                u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[
                    (1, 1)]
                return alpha * beta * u + alpha * (payoff[(0, 1)] - payoff[
                    (1, 1)]) + beta * (payoff[(1, 0)] - payoff[
                        (1, 1)]) + payoff[(1, 1)]

            payoff_0 = np.array([[0, 3], [1, 2]])
            payoff_1 = np.array([[3, 2], [0, 1]])

            self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0)
            self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1)

        elif self.game == 'ma_softq':
            assert self.agent_num == 2
            h1 = 0.8
            h2 = 1.
            s1 = 3.
            s2 = 1.
            x1 = -5.
            x2 = 5.
            y1 = -5.
            y2 = 5.
            c = 10.

            def max_f(a1, a2):
                f1 = h1 * (-(np.square(a1 - x1) / s1) -
                           (np.square(a2 - y1) / s1))
                f2 = h2 * (-(np.square(a1 - x2) / s2) -
                           (np.square(a2 - y2) / s2)) + c
                return max(f1, f2)

            self.payoff[0] = lambda a1, a2: max_f(a1, a2)
            self.payoff[1] = lambda a1, a2: max_f(a1, a2)
        self.rewards = np.zeros((self.agent_num, ))
Exemplo n.º 5
0
    def __init__(self,
                 game,
                 agent_num,
                 action_num,
                 payoff=None,
                 repeated=False,
                 max_step=25,
                 memory=0,
                 discrete_action=True,
                 tuple_obs=True):
        self.game = game
        self.agent_num = agent_num
        self.action_num = action_num
        self.discrete_action = discrete_action
        self.tuple_obs = tuple_obs
        # self.action_range
        # self.action_space = np.array([range(action_num)] * self.agent_num)
        # self.state_space = np.array([range(1)] * self.agent_num)
        if self.discrete_action:
            self.action_spaces = MADiscrete([action_num] * self.agent_num)
            if memory == 0:
                self.observation_spaces = MADiscrete([1] * self.agent_num)
            elif memory == 1:
                self.observation_spaces = MADiscrete([5] * self.agent_num)
        else:
            self.action_range = [-1., 1.]
            lows = np.array([np.array([-1.]) for _ in range(self.agent_num)])
            highs = np.array([np.array([1.]) for _ in range(self.agent_num)])
            self.action_spaces = MABox(lows=lows, highs=highs)
            if memory == 0:
                self.observation_spaces = MADiscrete([1] * self.agent_num)
            elif memory == 1:
                lows = np.array(
                    [np.array([-1., -1.]) for _ in range(self.agent_num)])
                highs = np.array(
                    [np.array([1., 1.]) for _ in range(self.agent_num)])
                self.observation_spaces = MABox(lows=lows, highs=highs)

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)

        self.t = 0
        self.repeated = repeated
        self.max_step = max_step
        self.memory = memory
        self.previous_action = 0
        self.previous_actions = []
        self.ep_rewards = np.zeros(2)

        if payoff is not None:
            payoff = np.array(payoff)
            assert payoff.shape == tuple([agent_num] +
                                         [action_num] * agent_num)
            self.payoff = payoff
        if payoff is None:
            self.payoff = np.zeros(
                tuple([agent_num] + [action_num] * agent_num))

        if game == 'coordination_0_0':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[1, -1], [-1, -1]]
            self.payoff[1] = [[1, -1], [-1, -1]]

        if game == 'coordination_same_action_with_preference':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[2, 0], [0, 1]]
            self.payoff[1] = [[1, 0], [0, 2]]

#     '''payoff tabular of zero-sum game scenario. nash equilibrium: (Agenat1's action=0,Agent2's action=1)'''
        elif game == 'zero_sum_nash_0_1':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[5, 2], [-1, 6]]
            self.payoff[1] = [[-5, -2], [1, -6]]


#     '''payoff tabular of zero-sumgame scenario. matching pennies'''
        elif game == 'matching_pennies':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[1, -1], [-1, 1]]
            self.payoff[1] = [[-1, 1], [1, -1]]

        # elif game == 'matching_pennies_3':
        #     assert self.agent_num == 3
        #     assert self.action_num == 2
        #     self.payoff[0]=[
        #                     [ [1,-1],
        #                       [-1,1] ],
        #                     [ [1, -1],
        #                      [-1, 1]]
        #                     ]
        #     self.payoff[1]=[
        #                     [ [1,-1],
        #                       [1,-1] ],
        #                     [[-1, 1],
        #                      [-1, 1]]
        #                     ]
        #     self.payoff[2] = [
        #                     [[-1, -1],
        #                      [1, 1]],
        #                     [[1, 1],
        #                      [-1, -1]]
        #                     ]

        elif game == 'prison_lola':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[-1, -3], [0, -2]]
            self.payoff[1] = [[-1, 0], [-3, -2]]

        elif game == 'prison':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[3, 1], [4, 2]]
            self.payoff[1] = [[3, 4], [1, 2]]

        elif game == 'stag_hunt':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[4, 1], [3, 2]]
            self.payoff[1] = [[4, 3], [1, 2]]

        elif game == 'chicken':  # snowdrift
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[3, 2], [4, 1]]
            self.payoff[1] = [[3, 4], [2, 1]]

        elif game == 'harmony':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[4, 3], [2, 1]]
            self.payoff[1] = [[4, 2], [3, 1]]

        elif game == 'wolf_05_05':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[0, 3], [1, 2]]
            self.payoff[1] = [[3, 2], [0, 1]]
            # \alpha, \beta = 0, 0.9, nash is 0.5 0.5
            # Q tables given, matian best response, learn a nash e.

        elif game == 'climbing':
            assert self.agent_num == 2
            assert self.action_num == 3
            self.payoff[0] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]]
            self.payoff[1] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]]
        elif game == 'penalty':
            assert self.agent_num == 2
            assert self.action_num == 3
            self.payoff[0] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]]
            self.payoff[1] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]]
        # elif game == 'rock_paper_scissors':
        #     assert self.agent_num == 2
        #     assert self.action_num == 3
        #     self.payoff[0] = [[0, -1, 1],
        #                       [1, 0, -1],
        #                       [-1, 1, 0]
        #                       ]
        #     self.payoff[1] = [[0, 1, -1],
        #                       [-1, 0, 1],
        #                       [1, -1, 0]
        #                       ]

        self.rewards = np.zeros((self.agent_num, ))