Пример #1
0
 def init_nets(self):
     '''Initialize nets with multi-task dimensions, and set net params'''
     self.state_dims = [
         body.state_dim for body in self.agent.nanflat_body_a]
     self.action_dims = [
         body.action_dim for body in self.agent.nanflat_body_a]
     self.total_state_dim = sum(self.state_dims)
     self.total_action_dim = sum(self.action_dims)
     net_spec = self.agent.spec['net']
     net_kwargs = util.compact_dict(dict(
         hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
         optim_param=_.get(net_spec, 'optim'),
         loss_param=_.get(net_spec, 'loss'),
         clamp_grad=_.get(net_spec, 'clamp_grad'),
         clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
     ))
     self.net = getattr(net, net_spec['type'])(
         self.total_state_dim, net_spec['hid_layers'], self.total_action_dim, **net_kwargs)
     self.target_net = getattr(net, net_spec['type'])(
         self.total_state_dim, net_spec['hid_layers'], self.total_action_dim, **net_kwargs)
     self.online_net = self.target_net
     self.eval_net = self.target_net
     util.set_attr(self, _.pick(net_spec, [
         'batch_size', 'update_type', 'update_frequency', 'polyak_weight',
     ]))
Пример #2
0
 def init_nets(self):
     '''Initialize nets with multi-task dimensions, and set net params'''
     # NOTE: Separate init from MultitaskDQN despite similarities so that this implementation can support arbitrary sized state and action heads (e.g. multiple layers)
     net_spec = self.agent.spec['net']
     if len(net_spec['hid_layers']) > 0:
         state_head_out_d = int(net_spec['hid_layers'][0] / 4)
     else:
         state_head_out_d = 16
     self.state_dims = [
         [body.state_dim, state_head_out_d] for body in self.agent.nanflat_body_a]
     self.action_dims = [
         [body.action_dim] for body in self.agent.nanflat_body_a]
     self.total_state_dim = sum([s[0] for s in self.state_dims])
     self.total_action_dim = sum([a[0] for a in self.action_dims])
     logger.debug(
         f'State dims: {self.state_dims}, total: {self.total_state_dim}')
     logger.debug(
         f'Action dims: {self.action_dims}, total: {self.total_action_dim}')
     net_kwargs = util.compact_dict(dict(
         hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
         optim_param=_.get(net_spec, 'optim'),
         loss_param=_.get(net_spec, 'loss'),
         clamp_grad=_.get(net_spec, 'clamp_grad'),
         clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
     ))
     self.net = getattr(net, net_spec['type'])(
         self.state_dims, net_spec['hid_layers'], self.action_dims, **net_kwargs)
     self.target_net = getattr(net, net_spec['type'])(
         self.state_dims, net_spec['hid_layers'], self.action_dims, **net_kwargs)
     self.online_net = self.target_net
     self.eval_net = self.target_net
     util.set_attr(self, _.pick(net_spec, [
         'batch_size', 'update_type', 'update_frequency', 'polyak_weight',
     ]))
Пример #3
0
 def init_nets(self):
     '''Initialize networks'''
     body = self.agent.nanflat_body_a[0]  # single-body algo
     state_dim = body.state_dim
     action_dim = body.action_dim
     net_spec = self.agent.spec['net']
     net_kwargs = util.compact_dict(dict(
         hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
         optim_param=_.get(net_spec, 'optim'),
         loss_param=_.get(net_spec, 'loss'),
         clamp_grad=_.get(net_spec, 'clamp_grad'),
         clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
     ))
     self.net = getattr(net, net_spec['type'])(
         state_dim, net_spec['hid_layers'], action_dim, **net_kwargs)
     self.target_net = getattr(net, net_spec['type'])(
         state_dim, net_spec['hid_layers'], action_dim, **net_kwargs)
     self.online_net = self.target_net
     self.eval_net = self.target_net
     util.set_attr(self, _.pick(net_spec, [
         'batch_size',
     ]))
     # Default network update params for base
     self.update_type = 'replace'
     self.update_frequency = 1
     self.polyak_weight = 0.0
Пример #4
0
 def init_nets(self):
     '''Initialize the neural network used to learn the Q function from the spec'''
     body = self.agent.nanflat_body_a[0]  # singleton algo
     state_dim = body.state_dim
     action_dim = body.action_dim
     self.is_discrete = body.is_discrete
     net_spec = self.agent.spec['net']
     net_kwargs = util.compact_dict(
         dict(
             hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
             optim_param=_.get(net_spec, 'optim'),
             loss_param=_.get(net_spec, 'loss'),
             clamp_grad=_.get(net_spec, 'clamp_grad'),
             clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
         ))
     # Below we automatically select an appropriate net for a discrete or continuous action space if the setting is of the form 'MLPdefault'. Otherwise the correct type of network is assumed to be specified in the spec.
     # Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
     # Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
     if net_spec['type'] == 'MLPdefault':
         if self.is_discrete:
             self.net = getattr(net,
                                'MLPNet')(state_dim, net_spec['hid_layers'],
                                          action_dim, **net_kwargs)
         else:
             self.net = getattr(net, 'MLPHeterogenousHeads')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim], **net_kwargs)
     else:
         self.net = getattr(net, net_spec['type'])(state_dim,
                                                   net_spec['hid_layers'],
                                                   action_dim, **net_kwargs)
Пример #5
0
 def init_nets(self):
     '''Initialize the neural network used to learn the Q function from the spec'''
     body = self.agent.nanflat_body_a[0]  # single-body algo
     state_dim = body.state_dim  # dimension of the environment state, e.g. 4
     action_dim = body.action_dim  # dimension of the environment actions, e.g. 2
     net_spec = self.agent.spec['net']
     net_kwargs = util.compact_dict(
         dict(
             hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
             optim_param=_.get(net_spec, 'optim'),
             loss_param=_.get(net_spec, 'loss'),
             clamp_grad=_.get(net_spec, 'clamp_grad'),
             clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
         ))
     self.net = getattr(net,
                        net_spec['type'])(state_dim, net_spec['hid_layers'],
                                          action_dim, **net_kwargs)
     util.set_attr(
         self,
         _.pick(
             net_spec,
             [
                 # how many examples to learn per training iteration
                 'batch_size',
                 'decay_lr',
                 'decay_lr_frequency',
                 'decay_lr_min_timestep',
             ]))
Пример #6
0
 def init_nets(self):
     '''Initialize the neural network used to learn the Q function from the spec'''
     body = self.agent.nanflat_body_a[0]  # single-body algo
     self.state_dim = body.state_dim  # dimension of the environment state, e.g. 4
     self.action_dim = body.action_dim  # dimension of the environment actions, e.g. 2
     net_spec = self.agent.spec['net']
     mem_spec = self.agent.spec['memory']
     net_kwargs = util.compact_dict(dict(
         hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
         optim_param=_.get(net_spec, 'optim'),
         loss_param=_.get(net_spec, 'loss'),
         clamp_grad=_.get(net_spec, 'clamp_grad'),
         clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
         gpu=_.get(net_spec, 'gpu'),
     ))
     if net_spec['type'].find('Recurrent') != -1:
         self.net = getattr(net, net_spec['type'])(
             self.state_dim, net_spec['hid_layers'], self.action_dim, mem_spec['length_history'], **net_kwargs)
     else:
         self.net = getattr(net, net_spec['type'])(
             self.state_dim, net_spec['hid_layers'], self.action_dim, **net_kwargs)
     self.set_net_attributes()
Пример #7
0
def test_compact_dict(d, res_d):
    assert util.compact_dict(d) == res_d
Пример #8
0
def test_compact_dict(d, res_d):
    assert util.compact_dict(d) == res_d
Пример #9
0
 def init_nets(self):
     '''Initialize the neural networks used to learn the actor and critic from the spec'''
     body = self.agent.nanflat_body_a[0]  # singleton algo
     state_dim = body.state_dim
     action_dim = body.action_dim
     self.is_discrete = body.is_discrete
     net_spec = self.agent.spec['net']
     mem_spec = self.agent.spec['memory']
     net_type = self.agent.spec['net']['type']
     actor_kwargs = util.compact_dict(
         dict(
             hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
             optim_param=_.get(net_spec, 'optim_actor'),
             loss_param=_.get(net_spec,
                              'loss'),  # Note: Not used for training actor
             clamp_grad=_.get(net_spec, 'clamp_grad'),
             clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
             gpu=_.get(net_spec, 'gpu'),
         ))
     if self.agent.spec['net']['use_same_optim']:
         logger.info('Using same optimizer for actor and critic')
         critic_kwargs = actor_kwargs
     else:
         logger.info('Using different optimizer for actor and critic')
         critic_kwargs = util.compact_dict(
             dict(
                 hid_layers_activation=_.get(net_spec,
                                             'hid_layers_activation'),
                 optim_param=_.get(net_spec, 'optim_critic'),
                 loss_param=_.get(net_spec, 'loss'),
                 clamp_grad=_.get(net_spec, 'clamp_grad'),
                 clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
                 gpu=_.get(net_spec, 'gpu'),
             ))
     '''
      Below we automatically select an appropriate net based on two different conditions
        1. If the action space is discrete or continuous action
                - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
                - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
        2. If the actor and critic are separate or share weights
                - If the networks share weights then the single network returns a list.
                     - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network.
                     - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network.
        3. If the network type is feedforward, convolutional, or recurrent
                 - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory
                 - Recurrent networks take n states as input and require an OnPolicyNStepReplay or OnPolicyNStepBatchReplay memory
     '''
     if net_type == 'MLPseparate':
         self.is_shared_architecture = False
         self.is_recurrent = False
         if self.is_discrete:
             self.actor = getattr(net, 'MLPNet')(state_dim,
                                                 net_spec['hid_layers'],
                                                 action_dim, **actor_kwargs)
             logger.info(
                 "Feedforward net, discrete action space, actor and critic are separate networks"
             )
         else:
             self.actor = getattr(net, 'MLPHeterogenousHeads')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim], **actor_kwargs)
             logger.info(
                 "Feedforward net, continuous action space, actor and critic are separate networks"
             )
         self.critic = getattr(net,
                               'MLPNet')(state_dim, net_spec['hid_layers'],
                                         1, **critic_kwargs)
     elif net_type == 'MLPshared':
         self.is_shared_architecture = True
         self.is_recurrent = False
         if self.is_discrete:
             self.actorcritic = getattr(net, 'MLPHeterogenousHeads')(
                 state_dim, net_spec['hid_layers'], [action_dim, 1],
                 **actor_kwargs)
             logger.info(
                 "Feedforward net, discrete action space, actor and critic combined into single network, sharing params"
             )
         else:
             self.actorcritic = getattr(net, 'MLPHeterogenousHeads')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim, 1], **actor_kwargs)
             logger.info(
                 "Feedforward net, continuous action space, actor and critic combined into single network, sharing params"
             )
     elif net_type == 'Convseparate':
         self.is_shared_architecture = False
         self.is_recurrent = False
         if self.is_discrete:
             self.actor = getattr(net,
                                  'ConvNet')(state_dim,
                                             net_spec['hid_layers'],
                                             action_dim, **actor_kwargs)
             logger.info(
                 "Convolutional net, discrete action space, actor and critic are separate networks"
             )
         else:
             self.actor = getattr(net, 'ConvNet')(state_dim,
                                                  net_spec['hid_layers'],
                                                  [action_dim, action_dim],
                                                  **actor_kwargs)
             logger.info(
                 "Convolutional net, continuous action space, actor and critic are separate networks"
             )
         self.critic = getattr(net,
                               'ConvNet')(state_dim, net_spec['hid_layers'],
                                          1, **critic_kwargs)
     elif net_type == 'Convshared':
         self.is_shared_architecture = True
         self.is_recurrent = False
         if self.is_discrete:
             self.actorcritic = getattr(net,
                                        'ConvNet')(state_dim,
                                                   net_spec['hid_layers'],
                                                   [action_dim, 1],
                                                   **actor_kwargs)
             logger.info(
                 "Convolutional net, discrete action space, actor and critic combined into single network, sharing params"
             )
         else:
             self.actorcritic = getattr(net, 'ConvNet')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim, 1], **actor_kwargs)
             logger.info(
                 "Convolutional net, continuous action space, actor and critic combined into single network, sharing params"
             )
     elif net_type == 'Recurrentseparate':
         self.is_shared_architecture = False
         self.is_recurrent = True
         if self.is_discrete:
             self.actor = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'], action_dim,
                 mem_spec['length_history'], **actor_kwargs)
             logger.info(
                 "Recurrent net, discrete action space, actor and critic are separate networks"
             )
         else:
             self.actor = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim], mem_spec['length_history'],
                 **actor_kwargs)
             logger.info(
                 "Recurrent net, continuous action space, actor and critic are separate networks"
             )
         self.critic = getattr(net,
                               'RecurrentNet')(state_dim,
                                               net_spec['hid_layers'], 1,
                                               mem_spec['length_history'],
                                               **critic_kwargs)
     elif net_type == 'Recurrentshared':
         self.is_shared_architecture = True
         self.is_recurrent = True
         if self.is_discrete:
             self.actorcritic = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'], [action_dim, 1],
                 mem_spec['length_history'], **actor_kwargs)
             logger.info(
                 "Recurrent net, discrete action space, actor and critic combined into single network, sharing params"
             )
         else:
             self.actorcritic = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim, 1], mem_spec['length_history'],
                 **actor_kwargs)
             logger.info(
                 "Recurrent net, continuous action space, actor and critic combined into single network, sharing params"
             )
     else:
         logger.warn(
             "Incorrect network type. Please use 'MLPshared', MLPseparate', Recurrentshared, or Recurrentseparate."
         )
         raise NotImplementedError