Пример #1
0
 def __init__(self,
              agent_num,
              mus=[0., 400.],
              sigmas=[100., 200.],
              action_low=0,
              action_high=10):
     Serializable.quick_init(self, locals())
     self.game_name = 'gaussian_squeeze'
     self.mus = np.array(mus)
     self.sigmas = np.array(sigmas)
     self.agent_num = agent_num
     self.action_range = [action_low, action_high]
     lows = np.array(
         [np.array([action_low]) for _ in range(self.agent_num)])
     highs = np.array(
         [np.array([action_high]) for _ in range(self.agent_num)])
     self.action_spaces = MABox(lows=lows, highs=highs)
     self.observation_spaces = MADiscrete([1] * self.agent_num)
     self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
Пример #2
0
    def __init__(self, game_name, agent_num, action_num=12):
        Serializable.quick_init(self, locals())
        self.game = game_name
        self.agent_num = agent_num
        self.action_num = action_num
        self.action_spaces = MADiscrete([action_num] * self.agent_num)
        self.observation_spaces = MADiscrete([1] * self.agent_num)
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.numplots = 0
        self.payoff = {}

        if self.game == 'lemonade':
            assert self.agent_num == 3
            def get_distance(a_n, i):
                assert len(a_n) == 3
                a_n_i = np.copy(a_n)
                a_n_i[0], a_n_i[i] = a_n_i[i], a_n_i[0]
                return np.abs(a_n_i[0] - a_n_i[1]) + np.abs(a_n_i[0] - a_n_i[2])
            self.payoff = lambda a_n, i: get_distance(a_n, i)
Пример #3
0
 def __init__(self,
              agent_num,
              game_name='pbeauty',
              p=0.67,
              reward_type='abs',
              action_low=-1.,
              action_high=1.):
     Serializable.quick_init(self, locals())
     self.agent_num = agent_num
     self.p = p
     self.game_name = game_name
     self.reward_type = reward_type
     self.action_range = [action_low, action_high]
     lows = np.array(
         [np.array([action_low]) for _ in range(self.agent_num)])
     highs = np.array(
         [np.array([action_high]) for _ in range(self.agent_num)])
     self.action_spaces = MABox(lows=lows, highs=highs)
     self.observation_spaces = MADiscrete([1] * self.agent_num)
     self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
     self.t = 0
     self.rewards = np.zeros((self.agent_num, ))
Пример #4
0
    def __init__(self, game_name, agent_num, action_low=-10, action_high=10):
        Serializable.quick_init(self, locals())
        self.game = game_name
        self.agent_num = agent_num
        # self.action_num = action_num
        self.action_range = [action_low, action_high]
        lows = np.array(
            [np.array([action_low]) for _ in range(self.agent_num)])
        highs = np.array(
            [np.array([action_high]) for _ in range(self.agent_num)])
        self.action_spaces = MABox(lows=lows, highs=highs)
        self.observation_spaces = MADiscrete([1] * self.agent_num)
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.numplots = 0
        self.payoff = {}

        if self.game == 'zero_sum':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: a1 * a2
            self.payoff[1] = lambda a1, a2: -a1 * a2
        elif self.game == 'trigonometric':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: np.cos(a2) * a1
            self.payoff[1] = lambda a1, a2: np.sin(a1) * a2
        elif self.game == 'mataching_pennies':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
            self.payoff[1] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
        elif self.game == 'rotational':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2
            self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2
        elif self.game == 'wolf':
            assert self.agent_num == 2

            def V(alpha, beta, payoff):
                u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[
                    (1, 1)]
                return alpha * beta * u + alpha * (payoff[(0, 1)] - payoff[
                    (1, 1)]) + beta * (payoff[(1, 0)] - payoff[
                        (1, 1)]) + payoff[(1, 1)]

            payoff_0 = np.array([[0, 3], [1, 2]])
            payoff_1 = np.array([[3, 2], [0, 1]])

            self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0)
            self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1)

        elif self.game == 'ma_softq':
            assert self.agent_num == 2
            h1 = 0.8
            h2 = 1.
            s1 = 3.
            s2 = 1.
            x1 = -5.
            x2 = 5.
            y1 = -5.
            y2 = 5.
            c = 10.

            def max_f(a1, a2):
                f1 = h1 * (-(np.square(a1 - x1) / s1) -
                           (np.square(a2 - y1) / s1))
                f2 = h2 * (-(np.square(a1 - x2) / s2) -
                           (np.square(a2 - y2) / s2)) + c
                return max(f1, f2)

            self.payoff[0] = lambda a1, a2: max_f(a1, a2)
            self.payoff[1] = lambda a1, a2: max_f(a1, a2)
        self.rewards = np.zeros((self.agent_num, ))
Пример #5
0
    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.time = 0

        self.agent_num = self.n
        # self.action_num = action_num
        # self.action_range = [action_low, action_high]
        # lows = np.array([np.array([action_low]) for _ in range(self.agent_num)])
        # highs = np.array([np.array([action_high]) for _ in range(self.agent_num)])

        obs_shapes = []

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)

            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            obs_shapes.append((obs_dim, ))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

        # simpified for non-comm game
        self.action_spaces = MABox(lows=[0] * self.agent_num,
                                   highs=[1] * self.agent_num,
                                   shapes=[(world.dim_p * 2 + 1, )] *
                                   self.agent_num)
        self.observation_spaces = MABox(lows=[-np.inf] * self.agent_num,
                                        highs=[+np.inf] * self.agent_num,
                                        shapes=obs_shapes)

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.action_range = [-10, 10]

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()
Пример #6
0
    def __init__(self,
                 game,
                 agent_num,
                 action_num,
                 payoff=None,
                 repeated=False,
                 max_step=25,
                 memory=0,
                 discrete_action=True,
                 tuple_obs=True):
        self.game = game
        self.agent_num = agent_num
        self.action_num = action_num
        self.discrete_action = discrete_action
        self.tuple_obs = tuple_obs
        # self.action_range
        # self.action_space = np.array([range(action_num)] * self.agent_num)
        # self.state_space = np.array([range(1)] * self.agent_num)
        if self.discrete_action:
            self.action_spaces = MADiscrete([action_num] * self.agent_num)
            if memory == 0:
                self.observation_spaces = MADiscrete([1] * self.agent_num)
            elif memory == 1:
                self.observation_spaces = MADiscrete([5] * self.agent_num)
        else:
            self.action_range = [-1., 1.]
            lows = np.array([np.array([-1.]) for _ in range(self.agent_num)])
            highs = np.array([np.array([1.]) for _ in range(self.agent_num)])
            self.action_spaces = MABox(lows=lows, highs=highs)
            if memory == 0:
                self.observation_spaces = MADiscrete([1] * self.agent_num)
            elif memory == 1:
                lows = np.array(
                    [np.array([-1., -1.]) for _ in range(self.agent_num)])
                highs = np.array(
                    [np.array([1., 1.]) for _ in range(self.agent_num)])
                self.observation_spaces = MABox(lows=lows, highs=highs)

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)

        self.t = 0
        self.repeated = repeated
        self.max_step = max_step
        self.memory = memory
        self.previous_action = 0
        self.previous_actions = []
        self.ep_rewards = np.zeros(2)

        if payoff is not None:
            payoff = np.array(payoff)
            assert payoff.shape == tuple([agent_num] +
                                         [action_num] * agent_num)
            self.payoff = payoff
        if payoff is None:
            self.payoff = np.zeros(
                tuple([agent_num] + [action_num] * agent_num))

        if game == 'coordination_0_0':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[1, -1], [-1, -1]]
            self.payoff[1] = [[1, -1], [-1, -1]]

        if game == 'coordination_same_action_with_preference':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[2, 0], [0, 1]]
            self.payoff[1] = [[1, 0], [0, 2]]

#     '''payoff tabular of zero-sum game scenario. nash equilibrium: (Agenat1's action=0,Agent2's action=1)'''
        elif game == 'zero_sum_nash_0_1':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[5, 2], [-1, 6]]
            self.payoff[1] = [[-5, -2], [1, -6]]


#     '''payoff tabular of zero-sumgame scenario. matching pennies'''
        elif game == 'matching_pennies':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[1, -1], [-1, 1]]
            self.payoff[1] = [[-1, 1], [1, -1]]

        # elif game == 'matching_pennies_3':
        #     assert self.agent_num == 3
        #     assert self.action_num == 2
        #     self.payoff[0]=[
        #                     [ [1,-1],
        #                       [-1,1] ],
        #                     [ [1, -1],
        #                      [-1, 1]]
        #                     ]
        #     self.payoff[1]=[
        #                     [ [1,-1],
        #                       [1,-1] ],
        #                     [[-1, 1],
        #                      [-1, 1]]
        #                     ]
        #     self.payoff[2] = [
        #                     [[-1, -1],
        #                      [1, 1]],
        #                     [[1, 1],
        #                      [-1, -1]]
        #                     ]

        elif game == 'prison_lola':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[-1, -3], [0, -2]]
            self.payoff[1] = [[-1, 0], [-3, -2]]

        elif game == 'prison':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[3, 1], [4, 2]]
            self.payoff[1] = [[3, 4], [1, 2]]

        elif game == 'stag_hunt':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[4, 1], [3, 2]]
            self.payoff[1] = [[4, 3], [1, 2]]

        elif game == 'chicken':  # snowdrift
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[3, 2], [4, 1]]
            self.payoff[1] = [[3, 4], [2, 1]]

        elif game == 'harmony':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[4, 3], [2, 1]]
            self.payoff[1] = [[4, 2], [3, 1]]

        elif game == 'wolf_05_05':
            assert self.agent_num == 2
            assert self.action_num == 2
            self.payoff[0] = [[0, 3], [1, 2]]
            self.payoff[1] = [[3, 2], [0, 1]]
            # \alpha, \beta = 0, 0.9, nash is 0.5 0.5
            # Q tables given, matian best response, learn a nash e.

        elif game == 'climbing':
            assert self.agent_num == 2
            assert self.action_num == 3
            self.payoff[0] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]]
            self.payoff[1] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]]
        elif game == 'penalty':
            assert self.agent_num == 2
            assert self.action_num == 3
            self.payoff[0] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]]
            self.payoff[1] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]]
        # elif game == 'rock_paper_scissors':
        #     assert self.agent_num == 2
        #     assert self.action_num == 3
        #     self.payoff[0] = [[0, -1, 1],
        #                       [1, 0, -1],
        #                       [-1, 1, 0]
        #                       ]
        #     self.payoff[1] = [[0, 1, -1],
        #                       [-1, 0, 1],
        #                       [1, -1, 0]
        #                       ]

        self.rewards = np.zeros((self.agent_num, ))