def __init__( self, observation_space, my_action_space, other_action_space, index=None, mas=None, exploration="EpsGreedy", gamma=0.99, lr=0.1, target_update_freq=None, name="MinimaxQAgent", ): QAgent.__init__( self, qmodel=MultiQTable( gymSpace2dim(observation_space), [ gymSpace2dim(my_action_space), gymSpace2dim(other_action_space) ], ), observation_space=observation_space, action_space=my_action_space, experience="ReplayMemory-1", exploration=exploration, gamma=gamma, lr=lr, batch_size=1, target_update_freq=target_update_freq, name=name, ) MATrainable.__init__(self, mas, index)
def __init__(self, model, observation_space=None, action_space=None): self.observation_space = observation_space self.action_space = action_space self.model = marl.model.make(model, obs_sp=gymSpace2dim( self.observation_space), act_sp=gymSpace2dim(self.action_space))
def __init__(self, model, observation_space=None, action_space=None): super(StochasticPolicy, self).__init__() self.observation_space = observation_space self.action_space = action_space obs_dim = gymSpace2dim(self.observation_space) act_dim = gymSpace2dim(self.action_space) self.model = marl.model.make(model, obs_sp=obs_dim, act_sp=act_dim)
def __init__(self, model, observation_space=None, action_space=None): super(DeterministicPolicy, self).__init__() self.observation_space = observation_space self.action_space = action_space self.low = (self.action_space.low[0] if isinstance( self.action_space, gym.spaces.Box) else 0) self.high = (self.action_space.high[0] if isinstance( self.action_space, gym.spaces.Box) else 1) obs_dim = gymSpace2dim(self.observation_space) act_dim = gymSpace2dim(self.action_space) self.model = marl.model.make(model, obs_sp=obs_dim, act_sp=act_dim)