def init_nets(self, global_nets=None): ''' Initialize the neural networks used to learn the actor and critic from the spec Below we automatically select an appropriate net based on two different conditions 1. If the action space is discrete or continuous action - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution. - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions 2. If the actor and critic are separate or share weights - If the networks share weights then the single network returns a list. - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network. - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network. 3. If the network type is feedforward, convolutional, or recurrent - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory - Recurrent networks take n states as input and require env spec "frame_op": "concat", "frame_op_len": seq_len ''' assert 'shared' in self.net_spec, 'Specify "shared" for ActorCritic network in net_spec' self.shared = self.net_spec['shared'] # create actor/critic specific specs actor_net_spec = self.net_spec.copy() critic_net_spec = self.net_spec.copy() for k in self.net_spec: if 'actor_' in k: actor_net_spec[k.replace('actor_', '')] = actor_net_spec.pop(k) critic_net_spec.pop(k) if 'critic_' in k: critic_net_spec[k.replace('critic_', '')] = critic_net_spec.pop(k) actor_net_spec.pop(k) if critic_net_spec['use_same_optim']: critic_net_spec = actor_net_spec in_dim = self.body.state_dim out_dim = net_util.get_out_dim(self.body, add_critic=self.shared) # main actor network, may contain out_dim self.shared == True NetClass = getattr(net, actor_net_spec['type']) self.net = NetClass(actor_net_spec, in_dim, out_dim) self.net_names = ['net'] if not self.shared: # add separate network for critic critic_out_dim = 1 CriticNetClass = getattr(net, critic_net_spec['type']) self.critic_net = CriticNetClass(critic_net_spec, in_dim, critic_out_dim) self.net_names.append('critic_net') # init net optimizer and its lr scheduler self.optim = net_util.get_optim(self.net, self.net.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler( self.optim, self.net.lr_scheduler_spec) if not self.shared: self.critic_optim = net_util.get_optim(self.critic_net, self.critic_net.optim_spec) self.critic_lr_scheduler = net_util.get_lr_scheduler( self.critic_optim, self.critic_net.lr_scheduler_spec) net_util.set_global_nets(self, global_nets) self.end_init_nets()
def init_nets(self, global_nets=None): ''' Networks: net(actor/policy), q1_net, target_q1_net, q2_net, target_q2_net All networks are separate, and have the same hidden layer architectures and optim specs, so tuning is minimal ''' self.shared = False # SAC does not share networks NetClass = getattr(net, self.net_spec['type']) # main actor network self.net = NetClass(self.net_spec, self.body.state_dim, net_util.get_out_dim(self.body)) self.net_names = ['net'] # two critic Q-networks to mitigate positive bias in q_loss and speed up training, uses q_net.py with prefix Q QNetClass = getattr(net, 'Q' + self.net_spec['type']) q_in_dim = [self.body.state_dim, self.body.action_dim] self.q1_net = QNetClass(self.net_spec, q_in_dim, 1) self.target_q1_net = QNetClass(self.net_spec, q_in_dim, 1) self.q2_net = QNetClass(self.net_spec, q_in_dim, 1) self.target_q2_net = QNetClass(self.net_spec, q_in_dim, 1) self.net_names += [ 'q1_net', 'target_q1_net', 'q2_net', 'target_q2_net' ] net_util.copy(self.q1_net, self.target_q1_net) net_util.copy(self.q2_net, self.target_q2_net) # temperature variable to be learned, and its target entropy self.log_alpha = torch.zeros(1, requires_grad=True, device=self.net.device) self.alpha = self.log_alpha.detach().exp() if self.body.is_discrete: self.target_entropy = -self.body.action_space.n else: self.target_entropy = -np.product(self.body.action_space.shape) # init net optimizer and its lr scheduler self.optim = net_util.get_optim(self.net, self.net.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler( self.optim, self.net.lr_scheduler_spec) self.q1_optim = net_util.get_optim(self.q1_net, self.q1_net.optim_spec) self.q1_lr_scheduler = net_util.get_lr_scheduler( self.q1_optim, self.q1_net.lr_scheduler_spec) self.q2_optim = net_util.get_optim(self.q2_net, self.q2_net.optim_spec) self.q2_lr_scheduler = net_util.get_lr_scheduler( self.q2_optim, self.q2_net.lr_scheduler_spec) self.alpha_optim = net_util.get_optim(self.log_alpha, self.net.optim_spec) self.alpha_lr_scheduler = net_util.get_lr_scheduler( self.alpha_optim, self.net.lr_scheduler_spec) net_util.set_global_nets(self, global_nets) self.post_init_nets()
def update_lr(self): assert 'lr' in self.optim_param old_lr = self.optim_param['lr'] self.optim_param['lr'] = old_lr * 0.9 logger.debug( f'Learning rate decayed from {old_lr} to {self.optim_param["lr"]}') self.optim = net_util.get_optim(self, self.optim_param)
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: hid_layers: list containing dimensions of the hidden layers hid_layers_activation: activation function for the hidden layers init_fn: weight initialization function clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_scheduler_spec: Pytorch optim.lr_scheduler update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' nn.Module.__init__(self) super(MLPNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'shared', 'hid_layers', 'hid_layers_activation', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) dims = [self.in_dim] + self.hid_layers self.model = net_util.build_fc_model(dims, self.hid_layers_activation) # add last layer with no activation # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): self.model_tail = nn.Linear(dims[-1], self.out_dim) else: self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim]) net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)
def update_lr(self): assert 'lr' in self.optim_spec old_lr = self.optim_spec['lr'] new_lr = self.lr_decay(self) if new_lr == old_lr: return self.optim_spec['lr'] = new_lr logger.info(f'Learning rate decayed from {old_lr:.6f} to {self.optim_spec["lr"]:.6f}') self.optim = net_util.get_optim(self, self.optim_spec)
def update_lr(self): assert 'lr' in self.optim_spec old_lr = self.optim_spec['lr'] new_lr = self.lr_decay(self) if new_lr == old_lr: return self.optim_spec['lr'] = new_lr logger.info(f'Learning rate decayed from {old_lr:.6f} to {self.optim_spec["lr"]:.6f}') self.optim = net_util.get_optim(self, self.optim_spec)
def __init__(self, in_dim, hid_dim, out_dim, hid_layers_activation=None, optim_param=None, loss_param=None, clamp_grad=False, clamp_grad_val=1.0, gpu=False): ''' in_dim: dimension of the inputs hid_dim: list containing dimensions of the hidden layers out_dim: dimension of the ouputs hid_layers_activation: activation function for the hidden layers optim_param: parameters for initializing the optimizer loss_param: measure of error between model predictions and correct outputs clamp_grad: whether to clamp the gradient gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing @example: net = MLPNet( 1000, [512, 256, 128], 10, hid_layers_activation='relu', optim_param={'name': 'Adam'}, loss_param={'name': 'mse_loss'}, clamp_grad=True, clamp_grad_val=2.0, gpu=True) ''' super(MLPNet, self).__init__() # Create net and initialize params self.in_dim = in_dim self.out_dim = out_dim self.layers = [] for i, layer in enumerate(hid_dim): in_D = in_dim if i == 0 else hid_dim[i - 1] out_D = hid_dim[i] self.layers += [nn.Linear(in_D, out_D)] self.layers += [net_util.get_activation_fn(hid_layers_activation)] in_D = hid_dim[-1] if len(hid_dim) > 0 else in_dim self.layers += [nn.Linear(in_D, out_dim)] self.model = nn.Sequential(*self.layers) self.init_params() if torch.cuda.is_available() and gpu: self.model.cuda() # Init other net variables self.params = list(self.model.parameters()) self.optim_param = optim_param self.optim = net_util.get_optim(self, self.optim_param) self.loss_fn = net_util.get_loss_fn(self, loss_param) self.clamp_grad = clamp_grad self.clamp_grad_val = clamp_grad_val
def init_nets(self, global_nets=None): ''' Networks: net(actor/policy), critic (value), target_critic, q1_net, q1_net All networks are separate, and have the same hidden layer architectures and optim specs, so tuning is minimal ''' self.shared = False # SAC does not share networks in_dim = self.body.state_dim out_dim = net_util.get_out_dim(self.body) NetClass = getattr(net, self.net_spec['type']) # main actor network self.net = NetClass(self.net_spec, in_dim, out_dim) self.net_names = ['net'] # critic network and its target network val_out_dim = 1 self.critic_net = NetClass(self.net_spec, in_dim, val_out_dim) self.target_critic_net = NetClass(self.net_spec, in_dim, val_out_dim) self.net_names += ['critic_net', 'target_critic_net'] # two Q-networks to mitigate positive bias in q_loss and speed up training q_in_dim = in_dim + self.body.action_dim # NOTE concat s, a for now self.q1_net = NetClass(self.net_spec, q_in_dim, val_out_dim) self.q2_net = NetClass(self.net_spec, q_in_dim, val_out_dim) self.net_names += ['q1_net', 'q2_net'] # init net optimizer and its lr scheduler self.optim = net_util.get_optim(self.net, self.net.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler( self.optim, self.net.lr_scheduler_spec) self.critic_optim = net_util.get_optim(self.critic_net, self.critic_net.optim_spec) self.critic_lr_scheduler = net_util.get_lr_scheduler( self.critic_optim, self.critic_net.lr_scheduler_spec) self.q1_optim = net_util.get_optim(self.q1_net, self.q1_net.optim_spec) self.q1_lr_scheduler = net_util.get_lr_scheduler( self.q1_optim, self.q1_net.lr_scheduler_spec) self.q2_optim = net_util.get_optim(self.q2_net, self.q2_net.optim_spec) self.q2_lr_scheduler = net_util.get_lr_scheduler( self.q2_optim, self.q2_net.lr_scheduler_spec) net_util.set_global_nets(self, global_nets) self.post_init_nets()
def __init__(self, net_spec, algorithm, in_dim, out_dim): nn.Module.__init__(self) Net.__init__(self, net_spec, algorithm, in_dim, out_dim) # set default util.set_attr( self, dict( clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert net_util.is_q_learning(algorithm) # Build model body dims = [self.in_dim] + self.hid_layers self.model_body = net_util.build_sequential(dims, self.hid_layers_activation) # output layers self.v = nn.Linear(dims[-1], 1) # state value self.adv = nn.Linear(dims[-1], out_dim) # action dependent raw advantage net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def init_nets(self, global_nets=None): '''Initialize the neural network used to learn the Q function from the spec''' if 'Recurrent' in self.net_spec['type']: self.net_spec.update(seq_len=self.net_spec['seq_len']) in_dim = self.body.state_dim out_dim = net_util.get_out_dim(self.body) NetClass = getattr(net, self.net_spec['type']) self.net = NetClass(self.net_spec, in_dim, out_dim) self.net_names = ['net'] # init net optimizer and its lr scheduler self.optim = net_util.get_optim(self.net, self.net.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self.optim, self.net.lr_scheduler_spec) net_util.set_global_nets(self, global_nets) self.end_init_nets()
def __init__(self, net_spec, algorithm, in_dim, out_dim): nn.Module.__init__(self) Net.__init__(self, net_spec, algorithm, in_dim, out_dim) # set default util.set_attr( self, dict( clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) dims = [self.in_dim] + self.hid_layers self.model_body = net_util.build_sequential(dims, self.hid_layers_activation) # multi-tail output layer with mean and std self.model_tails = nn.ModuleList( [nn.Linear(dims[-1], out_d) for out_d in out_dim]) net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, in_dim, out_dim): nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'shared', 'hid_layers', 'hid_layers_activation', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments # Build model body dims = [self.in_dim] + self.hid_layers self.model_body = net_util.build_sequential(dims, self.hid_layers_activation) # output layers self.v = nn.Linear(dims[-1], 1) # state value self.adv = nn.Linear(dims[-1], out_dim) # action dependent raw advantage net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)
def init_nets(self, global_nets=None): ''' Initialize the neural network used to learn the policy function from the spec Below we automatically select an appropriate net for a discrete or continuous action space if the setting is of the form 'MLPNet'. Otherwise the correct type of network is assumed to be specified in the spec. Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution. Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions ''' in_dim = self.body.state_dim out_dim = net_util.get_out_dim(self.body) NetClass = getattr(net, self.net_spec['type']) self.net = NetClass(self.net_spec, in_dim, out_dim) self.net_names = ['net'] # init net optimizer and its lr scheduler self.optim = net_util.get_optim(self.net, self.net.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self.optim, self.net.lr_scheduler_spec) net_util.set_global_nets(self, global_nets) self.end_init_nets()
def init_nets(self, global_nets=None): '''Initialize the neural network used to learn the Q function from the spec''' if self.algorithm_spec['name'] == 'VanillaDQN': assert all( k not in self.net_spec for k in ['update_type', 'update_frequency', 'polyak_coef'] ), 'Network update not available for VanillaDQN; use DQN.' in_dim = self.body.state_dim out_dim = net_util.get_out_dim(self.body) NetClass = getattr(net, self.net_spec['type']) self.net = NetClass(self.net_spec, in_dim, out_dim) self.net_names = ['net'] # init net optimizer and its lr scheduler self.optim = net_util.get_optim(self.net, self.net.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler( self.optim, self.net.lr_scheduler_spec) net_util.set_global_nets(self, global_nets) self.post_init_nets()
def init_nets(self, global_nets=None): '''Initialize networks''' if self.algorithm_spec['name'] == 'DQNBase': assert all( k not in self.net_spec for k in ['update_type', 'update_frequency', 'polyak_coef'] ), 'Network update not available for DQNBase; use DQN.' in_dim = self.body.state_dim out_dim = net_util.get_out_dim(self.body) NetClass = getattr(net, self.net_spec['type']) self.net = NetClass(self.net_spec, in_dim, out_dim) self.target_net = NetClass(self.net_spec, in_dim, out_dim) self.net_names = ['net', 'target_net'] # init net optimizer and its lr scheduler self.optim = net_util.get_optim(self.net, self.net.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler( self.optim, self.net.lr_scheduler_spec) net_util.set_global_nets(self, global_nets) self.post_init_nets() self.online_net = self.target_net self.eval_net = self.target_net
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: hid_layers: list with tuple consisting of two elements. (conv_hid, flat_hid) Note: tuple must contain two elements, use empty list if no such layers. 1. conv_hid: list containing dimensions of the convolutional hidden layers. Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md 2. flat_hid: list of dense layers following the convolutional layers hid_layers_activation: activation function for the hidden layers init_fn: weight initialization function batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay lr_anneal_timestep: timestep to anneal lr decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' # OpenAI gym provides images as W x H x C, pyTorch expects C x W x H in_dim = np.roll(in_dim, 1) # use generic multi-output for Convnet out_dim = np.reshape(out_dim, -1).tolist() nn.Module.__init__(self) super(ConvNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( init_fn='xavier_uniform_', batch_norm=True, clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'init_fn', 'batch_norm', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) self.conv_hid_layers = self.hid_layers[0] self.dense_hid_layers = self.hid_layers[1] # conv layer self.conv_model = self.build_conv_layers(self.conv_hid_layers) # fc layer from flattened conv self.dense_model = self.build_dense_layers(self.dense_hid_layers) # tails tail_in_dim = self.dense_hid_layers[-1] if len( self.dense_hid_layers) > 0 else self.conv_out_dim self.model_tails = nn.ModuleList( [nn.Linear(tail_in_dim, out_d) for out_d in self.out_dim]) net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, algorithm, in_dim, out_dim): ''' Multi state processing heads, single shared body, and multi action tails. There is one state and action head per body/environment Example: env 1 state env 2 state _______|______ _______|______ | head 1 | | head 2 | |______________| |______________| | | |__________________| ________________|_______________ | Shared body | |________________________________| | ________|_______ | | _______|______ ______|_______ | tail 1 | | tail 2 | |______________| |______________| | | env 1 action env 2 action ''' nn.Module.__init__(self) super(HydraMLPNet, self).__init__(net_spec, algorithm, in_dim, out_dim) # set default util.set_attr( self, dict( clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) assert len( self.hid_layers ) == 3, 'Your hidden layers must specify [*heads], [body], [*tails]. If not, use MLPHeterogenousTails' assert isinstance(self.in_dim, list), 'Hydra network needs in_dim as list' assert isinstance(self.out_dim, list), 'Hydra network needs out_dim as list' self.head_hid_layers = self.hid_layers[0] self.body_hid_layers = self.hid_layers[1] self.tail_hid_layers = self.hid_layers[2] if len(self.head_hid_layers) == 1: self.head_hid_layers = self.head_hid_layers * len(self.in_dim) if len(self.tail_hid_layers) == 1: self.tail_hid_layers = self.tail_hid_layers * len(self.out_dim) self.model_heads = self.build_model_heads(in_dim) heads_out_dim = np.sum( [head_hid_layers[-1] for head_hid_layers in self.head_hid_layers]) dims = [heads_out_dim] + self.body_hid_layers self.model_body = net_util.build_sequential(dims, self.hid_layers_activation) self.model_tails = self.build_model_tails(out_dim) net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, algorithm, in_dim, out_dim): ''' net_spec: hid_layers: list containing dimensions of the hidden layers hid_layers_activation: activation function for the hidden layers clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing e.g. net_spec "net": { "type": "MLPNet", "hid_layers": [32], "hid_layers_activation": "relu", "clip_grad": false, "clip_grad_val": 1.0, "loss_spec": { "name": "MSELoss" }, "optim_spec": { "name": "Adam", "lr": 0.02 }, "lr_decay": "rate_decay", "lr_decay_frequency": 500, "lr_decay_min_timestep": 1000, "update_type": "replace", "update_frequency": 1, "polyak_coef": 0.9, "gpu": true } ''' nn.Module.__init__(self) super(MLPNet, self).__init__(net_spec, algorithm, in_dim, out_dim) # set default util.set_attr( self, dict( clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) dims = [self.in_dim] + self.hid_layers self.model = net_util.build_sequential(dims, self.hid_layers_activation) # add last layer with no activation self.model.add_module(str(len(self.model)), nn.Linear(dims[-1], self.out_dim)) net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, algorithm, in_dim, out_dim): ''' net_spec: hid_layers: list with tuple consisting of two elements. (conv_hid, flat_hid) Note: tuple must contain two elements, use empty list if no such layers. 1. conv_hid: list containing dimensions of the convolutional hidden layers. Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md 2. flat_hid: list of dense layers following the convolutional layers hid_layers_activation: activation function for the hidden layers batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay lr_anneal_timestep: timestep to anneal lr decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' # OpenAI gym provides images as W x H x C, pyTorch expects C x W x H in_dim = np.roll(in_dim, 1) # use generic multi-output for Convnet out_dim = np.reshape(out_dim, -1).tolist() nn.Module.__init__(self) super(ConvNet, self).__init__(net_spec, algorithm, in_dim, out_dim) # set default util.set_attr(self, dict( batch_norm=True, clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'batch_norm', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) self.conv_hid_layers = self.hid_layers[0] self.dense_hid_layers = self.hid_layers[1] # conv layer self.conv_model = self.build_conv_layers(self.conv_hid_layers) # fc layer from flattened conv self.dense_model = self.build_dense_layers(self.dense_hid_layers) # tails tail_in_dim = self.dense_hid_layers[-1] if len(self.dense_hid_layers) > 0 else self.conv_out_dim self.model_tails = nn.ModuleList([nn.Linear(tail_in_dim, out_d) for out_d in self.out_dim]) net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: hid_layers: list containing dimensions of the hidden layers hid_layers_activation: activation function for the hidden layers init_fn: weight initialization function clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay lr_anneal_timestep: timestep to anneal lr decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' nn.Module.__init__(self) super(MLPNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( init_fn='xavier_uniform_', clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'separate', 'hid_layers', 'hid_layers_activation', 'init_fn', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) dims = [self.in_dim] + self.hid_layers self.model = net_util.build_sequential(dims, self.hid_layers_activation) # add last layer with no activation if ps.is_integer(self.out_dim): self.model.add_module(str(len(self.model)), nn.Linear(dims[-1], self.out_dim)) else: # if more than 1 output, add last layer as tails separate from main model self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim]) net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
"lr_scheduler_spec": { "name": "StepLR", "step_size": 30, "gamma": 0.1 }, "update_type": "replace", "update_frequency": 1, "polyak_coef": 0.9, "gpu": True } in_dim = 10 out_dim = 3 batch_size = 16 net = MLPNet(net_spec, in_dim, out_dim) # init net optimizer and its lr scheduler optim = net_util.get_optim(net, net.optim_spec) lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec) x = torch.rand((batch_size, in_dim)) def test_init(): net = MLPNet(net_spec, in_dim, out_dim) assert isinstance(net, nn.Module) assert hasattr(net, 'model') assert hasattr(net, 'model_tail') assert not hasattr(net, 'model_tails') def test_forward(): y = net.forward(x) assert y.shape == (batch_size, out_dim)
def __init__(self, net_spec, algorithm, in_dim, out_dim): ''' net_spec: hid_layers: list containing dimensions of the hidden layers. The last element of the list is should be the dimension of the hidden state for the recurrent layer. The other elements in the list are the dimensions of the MLP (if desired) which is to transform the state space. hid_layers_activation: activation function for the state_proc hidden layers rnn_hidden_size: rnn hidden_size rnn_num_layers: number of recurrent layers seq_len: length of the history of being passed to the net clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay lr_anneal_timestep: timestep to anneal lr decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' # use generic multi-output for RNN out_dim = np.reshape(out_dim, -1).tolist() nn.Module.__init__(self) super(RecurrentNet, self).__init__(net_spec, algorithm, in_dim, out_dim) # set default util.set_attr(self, dict( rnn_num_layers=1, clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'rnn_hidden_size', 'rnn_num_layers', 'seq_len', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'lr_anneal_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # state processing model state_proc_dims = [self.in_dim] + self.hid_layers self.state_proc_model = net_util.build_sequential(state_proc_dims, self.hid_layers_activation) # RNN model self.rnn_input_dim = state_proc_dims[-1] self.rnn_model = nn.GRU( input_size=self.rnn_input_dim, hidden_size=self.rnn_hidden_size, num_layers=self.rnn_num_layers, batch_first=True) # tails self.model_tails = nn.ModuleList([nn.Linear(self.rnn_hidden_size, out_d) for out_d in self.out_dim]) net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: cell_type: any of RNN, LSTM, GRU fc_hid_layers: list of fc layers preceeding the RNN layers hid_layers_activation: activation function for the fc hidden layers out_layer_activation: activation function for the output layer, same shape as out_dim rnn_hidden_size: rnn hidden_size rnn_num_layers: number of recurrent layers bidirectional: if RNN should be bidirectional seq_len: length of the history of being passed to the net init_fn: weight initialization function clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_scheduler_spec: Pytorch optim.lr_scheduler update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' nn.Module.__init__(self) super(RecurrentNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, cell_type='GRU', rnn_num_layers=1, bidirectional=False, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'cell_type', 'fc_hid_layers', 'hid_layers_activation', 'out_layer_activation', 'rnn_hidden_size', 'rnn_num_layers', 'bidirectional', 'seq_len', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # fc body: state processing model if ps.is_empty(self.fc_hid_layers): self.rnn_input_dim = self.in_dim else: fc_dims = [self.in_dim] + self.fc_hid_layers self.fc_model = net_util.build_fc_model(fc_dims, self.hid_layers_activation) self.rnn_input_dim = fc_dims[-1] # RNN model self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))( input_size=self.rnn_input_dim, hidden_size=self.rnn_hidden_size, num_layers=self.rnn_num_layers, batch_first=True, bidirectional=self.bidirectional) # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): self.model_tail = net_util.build_fc_model( [self.rnn_hidden_size, self.out_dim], self.out_layer_activation) else: if not ps.is_list(self.out_layer_activation): self.out_layer_activation = [self.out_layer_activation ] * len(out_dim) assert len(self.out_layer_activation) == len(self.out_dim) tails = [] for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): tail = net_util.build_fc_model([self.rnn_hidden_size, out_d], out_activ) tails.append(tail) self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)
def __init__(self, net_spec, in_dim, out_dim): assert len(in_dim) == 3 # image shape (c,w,h) nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( init_fn=None, batch_norm=False, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', 'init_fn', 'batch_norm', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert isinstance(out_dim, int) # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() # fc body if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim else: # fc layer from flattened conv self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed self.v = nn.Linear(tail_in_dim, 1) # state value self.adv = nn.Linear(tail_in_dim, out_dim) # action dependent raw advantage self.model_tails = nn.ModuleList(self.v, self.adv) net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation. Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md fc_hid_layers: list of fc layers following the convolutional layers hid_layers_activation: activation function for the hidden layers out_layer_activation: activation function for the output layer, same shape as out_dim init_fn: weight initialization function batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_scheduler_spec: Pytorch optim.lr_scheduler update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' assert len(in_dim) == 3 # image shape (c,w,h) nn.Module.__init__(self) super(ConvNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, init_fn=None, batch_norm=True, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', 'out_layer_activation', 'init_fn', 'batch_norm', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() # fc body if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim else: # fc body from flattened conv self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): self.model_tail = net_util.build_fc_model( [tail_in_dim, self.out_dim], self.out_layer_activation) else: if not ps.is_list(self.out_layer_activation): self.out_layer_activation = [self.out_layer_activation ] * len(out_dim) assert len(self.out_layer_activation) == len(self.out_dim) tails = [] for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ) tails.append(tail) self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): module.to(self.device) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)
def __init__(self, net_spec, algorithm, in_dim, out_dim): ''' net_spec: hid_layers: list containing dimensions of the hidden layers. The last element of the list is should be the dimension of the hidden state for the recurrent layer. The other elements in the list are the dimensions of the MLP (if desired) which is to transform the state space. hid_layers_activation: activation function for the state_proc hidden layers rnn_hidden_size: rnn hidden_size rnn_num_layers: number of recurrent layers seq_len: length of the history of being passed to the net clip_grad: whether to clip the gradient clip_grad_val: the clip value loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_decay: function to decay learning rate lr_decay_frequency: how many total timesteps per decay lr_decay_min_timestep: minimum amount of total timesteps before starting decay update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' # use generic multi-output for RNN out_dim = np.reshape(out_dim, -1).tolist() nn.Module.__init__(self) super(RecurrentNet, self).__init__(net_spec, algorithm, in_dim, out_dim) # set default util.set_attr( self, dict( rnn_num_layers=1, clip_grad=False, clip_grad_val=1.0, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_decay='no_decay', update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'rnn_hidden_size', 'rnn_num_layers', 'seq_len', 'clip_grad', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_decay', 'lr_decay_frequency', 'lr_decay_min_timestep', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # state processing model state_proc_dims = [self.in_dim] + self.hid_layers self.state_proc_model = net_util.build_sequential( state_proc_dims, self.hid_layers_activation) # RNN model self.rnn_input_dim = state_proc_dims[-1] self.rnn_model = nn.GRU(input_size=self.rnn_input_dim, hidden_size=self.rnn_hidden_size, num_layers=self.rnn_num_layers, batch_first=True) # tails self.model_tails = nn.ModuleList( [nn.Linear(self.rnn_hidden_size, out_d) for out_d in self.out_dim]) net_util.init_layers(self.modules()) if torch.cuda.is_available() and self.gpu: for module in self.modules(): module.cuda() self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.optim = net_util.get_optim(self, self.optim_spec) self.lr_decay = getattr(net_util, self.lr_decay)