示例#1
0
 def __init__(self, config, return_function):
     """ Parameters:
     Name:               Type:           Default:            Description: (Omitted when self-explanatory)
     buff_sz             int             10                  buffer size
     batch_sz            int             1
     frame_stack         int             4                   number of frames to stack, see Mnih et. al. (2015)
     env_state_dims      list            [2,2]               dimensions of the observations to be stored in the buffer
     num_actions         int             2                   number of actions available to the agent
     obs_dtype           np.type         np.uint8            the data type of the observations
     reward_clipping     bool            False               clipping the reward , see Mnih et. al. (2015)
     sigma               float           0.5                 Sigma parameter, see De Asis et. al (2018)
     sigma_decay         float           1.0                 decay rate of sigma
     """
     self.config = config
     self.buff_sz = check_attribute_else_default(config, 'buff_sz', 10)
     self.batch_sz = check_attribute_else_default(config, 'batch_sz', 1)
     self.frame_stack = check_attribute_else_default(
         config, 'frame_stack', 4)
     self.env_state_dims = list(
         check_attribute_else_default(config, 'env_state_dims', [2, 2]))
     self.num_actions = check_attribute_else_default(
         config, 'num_actions', 2)
     self.obs_dtype = check_attribute_else_default(config, 'obs_dtype',
                                                   np.uint8)
     self.reward_clipping = check_attribute_else_default(
         config, 'reward_clipping', False)
     self.sigma = check_attribute_else_default(config, 'sigma', 0.5)
     self.sigma_decay = check_attribute_else_default(
         config, 'sigma_decay', 1.0)
     """ Parameters for Return Function """
     assert isinstance(return_function, OnPolicyQSigmaReturnFunction)
     self.return_function = return_function
     self.n = return_function.n
     """ Parameters to keep track of the current state of the buffer """
     self.current_index = 0
     self.full_buffer = False
     """ Circular Buffers """
     self.state = CircularBuffer(self.buff_sz,
                                 shape=tuple(self.env_state_dims),
                                 dtype=self.obs_dtype)
     self.action = CircularBuffer(self.buff_sz, shape=(), dtype=np.uint8)
     self.reward = CircularBuffer(self.buff_sz, shape=(), dtype=np.int32)
     self.terminate = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     self.sigma = CircularBuffer(self.buff_sz, shape=(), dtype=np.float64)
示例#2
0
 def __init__(self, config, return_function):
     """ Parameters:
     Name:               Type:           Default:            Description: (Omitted when self-explanatory)
     buff_sz             int             10                  buffer size
     batch_sz            int             1
     env_state_dims      list            [2,2]               dimensions of the observations to be stored in the buffer
     num_actions         int             2                   number of actions available to the agent
     obs_dtype           np.type         np.uint8            the data type of the observations
     initial_rand_steps  int             0                   number of random steps before decaying sigma
     rand_steps_count    int             0                   number of random steps taken so far
     store_return        bool            True                save the computed return so that it can be reused
     """
     assert isinstance(config, Config)
     self.config = config
     self.buff_sz = check_attribute_else_default(self.config, 'buff_sz', 10)
     self.batch_sz = check_attribute_else_default(self.config, 'batch_sz',
                                                  1)
     self.env_state_dims = list(
         check_attribute_else_default(self.config, 'env_state_dims',
                                      [2, 2]))
     self.num_actions = check_attribute_else_default(
         self.config, 'num_actions', 2)
     self.obs_dtype = check_attribute_else_default(self.config, 'obs_dtype',
                                                   np.uint8)
     self.initial_rand_steps = check_attribute_else_default(
         self.config, 'initial_rand_steps', 0)
     check_attribute_else_default(self.config, 'rand_steps_count', 0)
     self.store_return = check_attribute_else_default(
         self.config, 'store_return', True)
     """ Parameters for Return Function """
     assert isinstance(return_function, nStep_Retrace_ReturnFunction)
     self.return_function = return_function
     self.n = return_function.n
     """ Termination or Timeout Count for Applying the Decay on Sigma """
     self.episodes_since_last_decay = 0
     """ Parameters to keep track of the current state of the buffer """
     self.current_index = 0
     self.full_buffer = False
     """ Circular Buffers """
     self.state = CircularBuffer(self.buff_sz,
                                 shape=tuple(self.env_state_dims),
                                 dtype=self.obs_dtype)
     self.action = CircularBuffer(self.buff_sz, shape=(), dtype=np.uint8)
     self.reward = CircularBuffer(self.buff_sz, shape=(), dtype=np.int32)
     self.terminate = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     self.timeout = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     self.bprobabilities = CircularBuffer(self.buff_sz,
                                          shape=(self.num_actions, ),
                                          dtype=np.float64)
     self.estimated_return = CircularBuffer(self.buff_sz,
                                            shape=(),
                                            dtype=np.float64)
     self.up_to_date = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
示例#3
0
 def __init__(self, config, return_function):
     """ Parameters:
     Name:               Type:           Default:            Description: (Omitted when self-explanatory)
     buff_sz             int             10                  buffer size
     batch_sz            int             1
     frame_stack         int             4                   number of frames to stack, see Mnih et. al. (2015)
     env_state_dims      list            [2,2]               dimensions of the observations to be stored in the buffer
     num_actions         int             2                   number of actions available to the agent
     obs_dtype           np.type         np.uint8            the data type of the observations
     reward_clipping     bool            False               clipping the reward , see Mnih et. al. (2015)
     sigma               float           0.5                 Sigma parameter, see De Asis et. al (2018)
     sigma_decay         float           1.0                 decay rate of sigma
     store_bprobs        bool            False               whether to store and use the behaviour policy probabilities
                                                             for the return function
     store_sigma         bool            False               whether to store sigma at every time step and use
                                                             the stored sigmas to compute the return. True = use the
                                                             sigma from the buffer, False = use the current sigma
     initial_rand_steps  int             0                   number of random steps before decaying sigma
     rand_steps_count    int             0                   number of random steps taken so far
     store_return        bool            True                save the computed return so that it can be reused
     """
     assert isinstance(config, Config)
     self.config = config
     self.buff_sz = check_attribute_else_default(self.config, 'buff_sz', 10)
     self.batch_sz = check_attribute_else_default(self.config, 'batch_sz',
                                                  1)
     self.frame_stack = check_attribute_else_default(
         self.config, 'frame_stack', 4)
     self.env_state_dims = list(
         check_attribute_else_default(self.config, 'env_state_dims',
                                      [2, 2]))
     self.num_actions = check_attribute_else_default(
         self.config, 'num_actions', 2)
     self.obs_dtype = check_attribute_else_default(self.config, 'obs_dtype',
                                                   np.uint8)
     self.reward_clipping = check_attribute_else_default(
         self.config, 'reward_clipping', False)
     self.sigma = check_attribute_else_default(self.config, 'sigma', 0.5)
     self.sigma_decay = check_attribute_else_default(
         self.config, 'sigma_decay', 1.0)
     self.store_bprobs = check_attribute_else_default(
         self.config, 'store_bprobs', False)
     self.store_sigma = check_attribute_else_default(
         self.config, 'store_sigma', False)
     self.initial_rand_steps = check_attribute_else_default(
         self.config, 'initial_rand_steps', 0)
     check_attribute_else_default(self.config, 'rand_steps_count', 0)
     self.store_return = check_attribute_else_default(
         self.config, 'store_return', True)
     """ Parameters for Return Function """
     assert isinstance(return_function, QSigmaReturnFunction)
     self.return_function = return_function
     self.n = return_function.n
     """ Parameters to keep track of the current state of the buffer """
     self.current_index = 0
     self.full_buffer = False
     """ Circular Buffers """
     self.state = CircularBuffer(self.buff_sz,
                                 shape=tuple(self.env_state_dims),
                                 dtype=self.obs_dtype)
     self.action = CircularBuffer(self.buff_sz, shape=(), dtype=np.uint8)
     self.reward = CircularBuffer(self.buff_sz, shape=(), dtype=np.int32)
     self.terminate = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     if self.store_bprobs:
         self.bprobabilities = CircularBuffer(self.buff_sz,
                                              shape=(self.num_actions, ),
                                              dtype=np.float64)
     if self.store_sigma:
         self.sigma_buffer = CircularBuffer(self.buff_sz,
                                            shape=(),
                                            dtype=np.float64)
     self.estimated_return = CircularBuffer(self.buff_sz,
                                            shape=(),
                                            dtype=np.float64)
     self.up_to_date = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
示例#4
0
 def __init__(self, config, return_function):
     """ Parameters:
     Name:               Type:           Default:            Description: (Omitted when self-explanatory)
     buff_sz             int             10                  buffer size
     batch_sz            int             1
     env_state_dims      list            [2,2]               dimensions of the observations to be stored in the buffer
     num_actions         int             2                   number of actions available to the agent
     obs_dtype           np.type         np.uint8            the data type of the observations
     sigma               float           0.5                 Sigma parameter, see De Asis et. al (2018)
     sigma_decay         float           1.0                 decay rate of sigma
     decay_type          string          exp                 decay type of sigma. Options: exp and lin
     decay_freq          int             1                   how often to decay sigma, e.g. a decay frequency of
                                                             10 would apply the decay once very 10 episodes
     sigma_min           float           0                   the lowest value sigma can attain when decaying
     store_bprobs        bool            False               whether to store and use the behaviour policy probabilities
                                                             for the return function
     store_sigma         bool            False               whether to store sigma at every time step and use
                                                             the stored sigmas to compute the return. True = use the
                                                             sigma from the buffer, False = use the current sigma
     initial_rand_steps  int             0                   number of random steps before decaying sigma
     rand_steps_count    int             0                   number of random steps taken so far
     store_return        bool            True                save the computed return so that it can be reused
     """
     assert isinstance(config, Config)
     self.config = config
     self.buff_sz = check_attribute_else_default(self.config, 'buff_sz', 10)
     self.batch_sz = check_attribute_else_default(self.config, 'batch_sz',
                                                  1)
     self.env_state_dims = list(
         check_attribute_else_default(self.config, 'env_state_dims',
                                      [2, 2]))
     self.num_actions = check_attribute_else_default(
         self.config, 'num_actions', 2)
     self.obs_dtype = check_attribute_else_default(self.config, 'obs_dtype',
                                                   np.uint8)
     self.sigma = check_attribute_else_default(self.config, 'sigma', 0.5)
     self.sigma_decay = check_attribute_else_default(
         self.config, 'sigma_decay', 1.0)
     self.decay_type = check_attribute_else_default(self.config,
                                                    'decay_type', 'exp')
     self.decay_freq = check_attribute_else_default(self.config,
                                                    'decay_freq', 1)
     self.sigma_min = check_attribute_else_default(self.config, 'sigma_min',
                                                   0.0)
     self.store_bprobs = check_attribute_else_default(
         self.config, 'store_bprobs', False)
     self.store_sigma = check_attribute_else_default(
         self.config, 'store_sigma', False)
     self.initial_rand_steps = check_attribute_else_default(
         self.config, 'initial_rand_steps', 0)
     check_attribute_else_default(self.config, 'rand_steps_count', 0)
     self.store_return = check_attribute_else_default(
         self.config, 'store_return', True)
     """ Parameters for Return Function """
     assert isinstance(return_function, QSigmaReturnFunction)
     self.return_function = return_function
     self.n = return_function.n
     """ Termination or Timeout Count for Applying the Decay on Sigma """
     self.episodes_since_last_decay = 0
     """ Parameters to keep track of the current state of the buffer """
     self.current_index = 0
     self.full_buffer = False
     """ Circular Buffers """
     self.state = CircularBuffer(self.buff_sz,
                                 shape=tuple(self.env_state_dims),
                                 dtype=self.obs_dtype)
     self.action = CircularBuffer(self.buff_sz, shape=(), dtype=np.uint8)
     self.reward = CircularBuffer(self.buff_sz, shape=(), dtype=np.int32)
     self.terminate = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     self.timeout = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     if self.store_bprobs:
         self.bprobabilities = CircularBuffer(self.buff_sz,
                                              shape=(self.num_actions, ),
                                              dtype=np.float64)
     if self.store_sigma:
         self.sigma_buffer = CircularBuffer(self.buff_sz,
                                            shape=(),
                                            dtype=np.float64)
     self.estimated_return = CircularBuffer(self.buff_sz,
                                            shape=(),
                                            dtype=np.float64)
     self.up_to_date = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)