示例#1
0
    def __init__(
        self,
        agent_num,
        game_name="pbeauty",
        p=0.67,
        reward_type="abs",
        action_range=(-1.0, 1.0),
    ):
        self.agent_num = agent_num
        self.p = p
        self.game_name = game_name
        self.reward_type = reward_type
        self.action_range = action_range
        self.action_spaces = MASpace(
            tuple(Box(low=-1.0, high=1.0, shape=(1,)) for _ in range(self.agent_num))
        )
        self.observation_spaces = MASpace(
            tuple(Discrete(1) for _ in range(self.agent_num))
        )
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.rewards = np.zeros((self.agent_num,))

        if not self.game_name in PBeautyGame.get_game_list():
            raise EnvironmentNotFound(f"The game {self.game_name} doesn't exists")

        if self.game_name == "pbeauty":
            if (
                not self.reward_type
                in PBeautyGame.get_game_list()[self.game_name]["reward_type"]
            ):
                raise RewardTypeNotFound(
                    f"The reward type {self.reward_type} doesn't exists"
                )
    def __init__(self):
        self.env = gym.make("LunarLanderContinuous-v2")

        self.agent_num = 2
        self.observation_spaces = MASpace(
            tuple(self.env.observation_space for _ in range(self.agent_num))
        )
        self.action_spaces = MASpace(
            tuple([Box(low=-1.0, high=1.0, shape=(1,)) for _ in range(self.agent_num)])
        )
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
示例#3
0
 def __init__(self, agent_num, height):
     self.agent_num = agent_num
     self.height = height
     obs_dim = height * 4 + 1
     self.action_range = [0, 1, 2]
     self.action_spaces = MASpace(
         tuple(
             Box(low=0, high=2, shape=(1, ), dtype=np.int32)
             for _ in range(self.agent_num)))
     self.observation_spaces = MASpace(
         tuple(
             Box(low=0, high=1, shape=(obs_dim, ), dtype=np.int32)
             for _ in range(self.agent_num)))
     self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
     self.grid = []
     self.busy_n = {}
示例#4
0
    def __init__(self,
                 env,
                 agent_num=None,
                 action_space=None,
                 observation_space=None):
        self.env = env
        self.action_space = action_space
        self.observation_space = observation_space
        self.agent_num = agent_num
        if self.agent_num is None:
            if hasattr(self.env, "agent_num"):
                self.agent_num = self.env.agent_num
            if hasattr(self.env, "n"):
                self.agent_num = self.env.n
            if hasattr(self.env, "n_agents"):
                self.agent_num = self.env.n_agents
        # print('malib', self.agent_num, env.get_obs_size(), env, env.action_space, env.observation_sapce)
        self.action_space = self.action_spaces = MASpace(
            tuple(self.action_space for _ in range(self.agent_num)))
        if self.observation_space is None:
            obs_dim = self.env.get_obs_size()
            self.observation_spaces = MASpace(
                tuple(
                    gym.spaces.Box(low=-np.inf,
                                   high=+np.inf,
                                   shape=(obs_dim, ),
                                   dtype=np.float32)
                    for _ in range(self.agent_num)))
        else:
            self.observation_spaces = MASpace(
                tuple(self.observation_space for _ in range(self.agent_num)))
        # if hasattr(self.env, "action_space") and hasattr(self.env, "observation_sapce"):

        #     self.observation_spaces = MASpace(tuple(gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32) for _ in range(self.agent_num)))
        # elif hasattr(self.env, "action_spaces") and hasattr(self.env, "observation_spaces"):
        #     self.action_spaces = self.env.action_spaces
        #     self.observation_spaces = self.env.observation_spaces

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
示例#5
0
    def __init__(self):
        self.agent_num = 1
        self.x = 0
        self.y = 50
        self.theta = 0
        self.v = 2
        self.w = 0
        self.stoch = 0
        self.t = 0

        obs_lows = np.array([0, 0, -1. * np.pi / 3., 2., -1.])
        obs_highs = np.array([50, 100, np.pi / 3., 5., 1.])
        self.observation_spaces = MASpace(
            tuple([Box(low=obs_lows, high=obs_highs)]))
        self.action_spaces = MASpace(
            tuple([Box(low=-1., high=1., shape=(2, ))]))

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.avs = [0] * 10
        self.aws = [0] * 10

        self.rewards = list(
        )  # a list that records rewards obtained in current episode, in sequential order.
示例#6
0
    def __init__(
        self,
        game_name,
        agent_num,
        action_num,
        payoff=None,
        repeated=False,
        max_step=25,
        memory=0,
        discrete_action=True,
        tuple_obs=True,
    ):
        self.game_name = game_name
        self.agent_num = agent_num
        self.action_num = action_num
        self.discrete_action = discrete_action
        self.tuple_obs = tuple_obs

        game_list = MatrixGame.get_game_list()

        if not self.game_name in game_list:
            raise EnvironmentNotFound(
                f"The game {self.game_name} doesn't exists")

        expt_num_agent = game_list[self.game_name]["agent_num"]
        expt_num_action = game_list[self.game_name]["action_num"]

        if expt_num_agent != self.agent_num:
            raise WrongNumberOfAgent(f"The number of agent \
                required for {self.game_name} is {expt_num_agent}")

        if expt_num_action != self.action_num:
            raise WrongNumberOfAction(f"The number of action \
                required for {self.game_name} is {expt_num_action}")

        self.action_spaces = MASpace(
            tuple(
                Box(low=-1.0, high=1.0, shape=(1, ))
                for _ in range(self.agent_num)))
        self.observation_spaces = MASpace(
            tuple(Discrete(1) for _ in range(self.agent_num)))

        if self.discrete_action:
            self.action_spaces = MASpace(
                tuple(Discrete(action_num) for _ in range(self.agent_num)))
            if memory == 0:
                self.observation_spaces = MASpace(
                    tuple(Discrete(1) for _ in range(self.agent_num)))
            elif memory == 1:
                self.observation_spaces = MASpace(
                    tuple(Discrete(5) for _ in range(self.agent_num)))
        else:
            self.action_range = [-1.0, 1.0]
            self.action_spaces = MASpace(
                tuple(
                    Box(low=-1.0, high=1.0, shape=(1, ))
                    for _ in range(self.agent_num)))
            if memory == 0:
                self.observation_spaces = MASpace(
                    tuple(Discrete(1) for _ in range(self.agent_num)))
            elif memory == 1:
                self.observation_spaces = MASpace(
                    tuple(
                        Box(low=-1.0, high=1.0, shape=(12, ))
                        for _ in range(self.agent_num)))

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)

        self.t = 0
        self.repeated = repeated
        self.max_step = max_step
        self.memory = memory
        self.previous_action = 0
        self.previous_actions = []
        self.ep_rewards = np.zeros(2)

        if payoff is not None:
            payoff = np.array(payoff)
            assert payoff.shape == tuple([agent_num] +
                                         [action_num] * agent_num)
            self.payoff = payoff
        if payoff is None:
            self.payoff = np.zeros(
                tuple([agent_num] + [action_num] * agent_num))

        if self.game_name == "coordination_0_0":
            self.payoff[0] = [[1, -1], [-1, -1]]
            self.payoff[1] = [[1, -1], [-1, -1]]
        elif self.game_name == "coordination_same_action_with_preference":
            self.payoff[0] = [[2, 0], [0, 1]]
            self.payoff[1] = [[1, 0], [0, 2]]
        elif self.game_name == "zero_sum_nash_0_1":
            # payoff tabular of zero-sum game scenario. nash equilibrium: (Agenat1's action=0,Agent2's action=1)
            self.payoff[0] = [[5, 2], [-1, 6]]
            self.payoff[1] = [[-5, -2], [1, -6]]
        elif self.game_name == "matching_pennies":
            # payoff tabular of zero-sumgame scenario. matching pennies
            self.payoff[0] = [[1, -1], [-1, 1]]
            self.payoff[1] = [[-1, 1], [1, -1]]
        elif self.game_name == "matching_pennies_3":
            self.payoff[0] = [[[1, -1], [-1, 1]], [[1, -1], [-1, 1]]]
            self.payoff[1] = [[[1, -1], [1, -1]], [[-1, 1], [-1, 1]]]
            self.payoff[2] = [[[-1, -1], [1, 1]], [[1, 1], [-1, -1]]]
        elif self.game_name == "prison_lola":
            self.payoff[0] = [[-1, -3], [0, -2]]
            self.payoff[1] = [[-1, 0], [-3, -2]]
        elif self.game_name == "prison":
            self.payoff[0] = [[3, 1], [4, 2]]
            self.payoff[1] = [[3, 4], [1, 2]]
        elif self.game_name == "stag_hunt":
            self.payoff[0] = [[4, 1], [3, 2]]
            self.payoff[1] = [[4, 3], [1, 2]]
        elif self.game_name == "chicken":  # snowdrift
            self.payoff[0] = [[3, 2], [4, 1]]
            self.payoff[1] = [[3, 4], [2, 1]]
        elif self.game_name == "harmony":
            self.payoff[0] = [[4, 3], [2, 1]]
            self.payoff[1] = [[4, 2], [3, 1]]
        elif self.game_name == "wolf_05_05":
            self.payoff[0] = [[0, 3], [1, 2]]
            self.payoff[1] = [[3, 2], [0, 1]]
            # \alpha, \beta = 0, 0.9, nash is 0.5 0.5
            # Q tables given, matian best response, learn a nash e.
        elif self.game_name == "climbing":
            self.payoff[0] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]]
            self.payoff[1] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]]
        elif self.game_name == "penalty":
            self.payoff[0] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]]
            self.payoff[1] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]]
        elif self.game_name == "rock_paper_scissors":
            self.payoff[0] = [[0, -1, 1], [1, 0, -1], [-1, 1, 0]]
            self.payoff[1] = [[0, 1, -1], [-1, 0, 1], [1, -1, 0]]

        self.rewards = np.zeros((self.agent_num, ))
示例#7
0
    def __init__(self,
                 game_name,
                 agent_num,
                 action_num,
                 state_num,
                 payoff=None,
                 transition=None):
        self.game_name = game_name
        self.agent_num = agent_num
        self.action_num = action_num
        self.state_num = state_num

        game_list = StochasticMatrixGame.get_game_list()

        if not self.game_name in game_list:
            raise EnvironmentNotFound(
                f"The game {self.game_name} doesn't exists")

        expt_num_agent = game_list[self.game_name]['agent_num']
        if expt_num_agent != self.agent_num:
            raise WrongNumberOfAgent(f"The number of agent \
                required for {self.game_name} is {expt_num_agent}")

        expt_num_action = game_list[self.game_name]['action_num']
        if expt_num_agent != self.action_num:
            raise WrongNumberOfAction(f"The number of action \
                required for {self.game_name} is {expt_num_action}")

        expt_num_state = game_list[self.game_name]['state_num']
        if expt_num_state != self.state_num:
            raise WrongNumberOfState(f"The number of state \
                required for {self.game_name} is {expt_num_state}")

        self.action_spaces = MASpace(
            tuple(
                Box(low=-1., high=1., shape=(1, ))
                for _ in range(self.agent_num)))
        self.observation_spaces = MASpace(
            tuple(Discrete(1) for _ in range(self.agent_num)))
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)

        self.t = 0
        if payoff is not None:
            payoff = np.array(payoff)
            assert payoff.shape == tuple([state_num, agent_num] +
                                         [action_num] * agent_num)
            self.payoff = payoff
        if payoff is None:
            self.payoff = np.zeros(
                tuple([state_num, agent_num] + [action_num] * agent_num))

        if transition is None:
            self.transition = np.zeros(
                tuple([state_num] + [action_num] * agent_num + [state_num]))

        if self.game_name == 'PollutionTax':
            self.payoff[0][0] = [[4., 3.], [7., 6.]]
            self.payoff[0][1] = [[5., 8.], [4., 7.]]
            self.payoff[1][0] = [[1., 0.], [4., 3.]]
            self.payoff[1][1] = [[2., 5.], [1., 4.]]
            self.transition[0] = [[[1., 0.], [0., 1.]], [[0., 1.], [0., 1.]]]
            self.transition[1] = [[[1., 0.], [0., 1.]], [[0., 1.], [0., 1.]]]
        elif self.game_name == 'three_matrix_games':
            self.g1 = [[0., 3.], [2., -1.]]
            self.g2 = [[0., 1.], [4., 3.]]
            self.g = [['g1', 4.], [5., 'g2']]

        self.rewards = np.zeros((self.agent_num, ))
        self.state = 0
示例#8
0
    def __init__(
        self,
        world,
        reset_callback=None,
        reward_callback=None,
        observation_callback=None,
        info_callback=None,
        done_callback=None,
        shared_viewer=True,
    ):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = (world.discrete_action if hasattr(
            world, "discrete_action") else False)
        # if true, every agent has the same reward
        self.shared_reward = (world.collaborative if hasattr(
            world, "collaborative") else False)
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        obs_shapes = []
        self.agent_num = len(self.agents)
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(
                    low=-agent.u_range,
                    high=+agent.u_range,
                    shape=(world.dim_p, ),
                    dtype=np.float32,
                )
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            obs_shapes.append((obs_dim, ))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)
        # simpified for non-comm game

        # self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num)))
        # self.observation_spaces = MASpace(tuple(Discrete(1) for _ in range(self.agent_num)))

        self.action_spaces = MASpace(
            tuple(
                Box(low=0.0, high=1.0, shape=(world.dim_p * 2 + 1, ))
                for _ in range(self.agent_num)))
        # print(obs_shapes)
        self.observation_spaces = MASpace(
            tuple(
                Box(low=-np.inf, high=+np.inf, shape=obs_shape)
                for obs_shape in obs_shapes))

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.action_range = [0.0, 1.0]
        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()
    def __init__(self, game_name, agent_num, action_range=(-10, 10)):
        self.game_name = game_name
        self.agent_num = agent_num
        self.action_range = action_range

        game_list = DifferentialGame.get_game_list()

        if not self.game_name in game_list:
            raise EnvironmentNotFound(
                f"The game {self.game_name} doesn't exists")

        expt_num_agent = game_list[self.game_name]["agent_num"]
        if expt_num_agent != self.agent_num:
            raise WrongNumberOfAgent(f"The number of agent \
                required for {self.game_name} is {expt_num_agent}")

        self.action_spaces = MASpace(
            tuple(
                Box(low=-1.0, high=1.0, shape=(1, ))
                for _ in range(self.agent_num)))
        self.observation_spaces = MASpace(
            tuple(
                Box(low=-1.0, high=1.0, shape=(1, ))
                for _ in range(self.agent_num)))
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.payoff = {}

        if self.game_name == "zero_sum":
            self.payoff[0] = lambda a1, a2: a1 * a2
            self.payoff[1] = lambda a1, a2: -a1 * a2
        elif self.game_name == "trigonometric":
            self.payoff[0] = lambda a1, a2: np.cos(a2) * a1
            self.payoff[1] = lambda a1, a2: np.sin(a1) * a2
        elif self.game_name == "mataching_pennies":
            self.payoff[0] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
            self.payoff[1] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
        elif self.game_name == "rotational":
            self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2
            self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2
        elif self.game_name == "wolf":

            def V(alpha, beta, payoff):
                u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[
                    (1, 1)]
                return (alpha * beta * u + alpha *
                        (payoff[(0, 1)] - payoff[(1, 1)]) + beta *
                        (payoff[(1, 0)] - payoff[(1, 1)]) + payoff[(1, 1)])

            payoff_0 = np.array([[0, 3], [1, 2]])
            payoff_1 = np.array([[3, 2], [0, 1]])

            self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0)
            self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1)
        elif self.game_name == "ma_softq":
            h1 = 0.8
            h2 = 1.0
            s1 = 3.0
            s2 = 1.0
            x1 = -5.0
            x2 = 5.0
            y1 = -5.0
            y2 = 5.0
            c = 10.0

            def max_f(a1, a2):
                f1 = h1 * (-(np.square(a1 - x1) / s1) -
                           (np.square(a2 - y1) / s1))
                f2 = h2 * (-(np.square(a1 - x2) / s2) -
                           (np.square(a2 - y2) / s2)) + c
                return max(f1, f2)

            self.payoff[0] = lambda a1, a2: max_f(a1, a2)
            self.payoff[1] = lambda a1, a2: max_f(a1, a2)
        else:
            raise EnvironmentNotFound(
                f"The game {self.game_name} doesn't exists")

        self.rewards = np.zeros((self.agent_num, ))
示例#10
0
    def __init__(self,
                 n_pursuers,
                 n_evaders,
                 n_coop=2,
                 n_poison=10,
                 radius=0.015,
                 obstacle_radius=0.2,
                 obstacle_loc=np.array([0.5, 0.5]),
                 ev_speed=0.01,
                 poison_speed=0.01,
                 n_sensors=30,
                 sensor_range=0.2,
                 action_scale=0.01,
                 poison_reward=-1.,
                 food_reward=1.,
                 encounter_reward=.05,
                 control_penalty=-.5,
                 reward_mech='global',
                 addid=True,
                 speed_features=True,
                 **kwargs):
        self.n_pursuers = n_pursuers
        self.n_evaders = n_evaders
        self.n_coop = n_coop
        self.n_poison = n_poison
        self.obstacle_radius = obstacle_radius
        self.obstacle_loc = obstacle_loc
        self.poison_speed = poison_speed
        self.radius = radius
        self.ev_speed = ev_speed
        self.n_sensors = n_sensors
        self.sensor_range = np.ones(self.n_pursuers) * sensor_range
        self.action_scale = action_scale
        self.poison_reward = poison_reward
        self.food_reward = food_reward
        self.control_penalty = control_penalty
        self.encounter_reward = encounter_reward

        self.n_obstacles = 1
        self._reward_mech = reward_mech
        self._addid = addid
        self._speed_features = speed_features
        self.seed()
        self._pursuers = [
            Archea(npu + 1,
                   self.radius * 3 / 4,
                   self.n_sensors,
                   self.sensor_range[npu],
                   addid=self._addid,
                   speed_features=self._speed_features)
            for npu in range(self.n_pursuers)
        ]
        self._evaders = [
            Archea(nev + 1, self.radius * 3 / 4, self.n_pursuers,
                   self.sensor_range.mean() / 2)
            for nev in range(self.n_evaders)
        ]
        self._poisons = [
            Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0)
            for npo in range(self.n_poison)
        ]

        self.observation_spaces = MASpace(
            tuple(pursuer.observation_space for pursuer in self._pursuers))
        self.action_spaces = MASpace(
            tuple(pursuer.action_space for pursuer in self._pursuers))
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
示例#11
0
    def __init__(self, world, reset_callback=None, reward_callback=None,
                 observation_callback=None, info_callback=None,
                 done_callback=None, shared_viewer=True):

        self.world = world
        self.world.discrete_action = True
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(self.world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(self.world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        obs_shapes = []
        self.agent_num = len(self.agents)
        ## I think everything inside this loop is useless, self.action_spaces is used later and not self.action_space
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete((world.dim_p-1) * 2 + 3)   ##
            else:
                u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
                    act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            obs_shapes.append((obs_dim,))
            self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)
        # simpified for non-comm game

        # self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num)))
        # self.observation_spaces = MASpace(tuple(Discrete(1) for _ in range(self.agent_num)))

        # action originally had 5 values - accel, +forcex, -forcex, +forcey, -forcey
        # I added extra 2 components, change in rotation angle and shoot 
        self.action_spaces = MASpace(tuple(Box(low=0., high=1., shape=((world.dim_p-1) * 2 + 3,)) for _ in range(self.agent_num)))  ##
        self.observation_spaces = MASpace(tuple(Box(low=-np.inf, high=+np.inf, shape=obs_shape) for obs_shape in obs_shapes))

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.action_range = [0., 1.]
        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        mixer.init()
        soundFiles = gym_fortattack.__file__[:-11]+'envs/Game/'
        # bulletFile = os.path.realpath(__file__)[:-13]+'Game/bullet.mp3'
        mixer.music.load(soundFiles+'bullet.mp3')
        # print(gym_fortattack.__file__)
        # time.sleep(5)
        self.prevShot, self.shot = False, False     # used for rendering
        self._reset_render()