示例#1
0
    def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes,
                 activation):
        '''
        A Convolutional Neural Net for the Actor network for Continuous outputs
        Network Architecture: (input) -> CNN -> MLP -> (output)
        Assume input is in the shape: (3, 128, 128)
        Args:
            obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
            act_dim (int): action dimension of the environment
            conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                                    that describes the cnn architecture
            hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
            activation (nn.modules.activation): Activation function for each layer of MLP
            act_limit (float): the greatest magnitude possible for the action in the environment
        '''
        super().__init__()
        log_std = -0.5 * np.ones(act_dim, dtype=np.float32)
        self.log_std = torch.nn.Parameter(torch.as_tensor(log_std))

        self.mu_cnn = cnn(obs_dim[0],
                          conv_layer_sizes,
                          activation,
                          batchnorm=True)
        self.start_dim = self.calc_shape(obs_dim, self.mu_cnn)
        mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim]
        self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
        # initialise actor network final layer weights to be 1/100 of other weights
        self.mu_mlp[
            -2].weight.data /= 100  # last layer is Identity, so we tweak second last layer weights
示例#2
0
    def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes,
                 activation, act_limit):
        '''
        A Convolutional Neural Net for the Actor network
        Network Architecture: (input) -> CNN -> MLP -> (output)
        Assume input is in the shape: (3, 128, 128)
        Args:
            obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
            act_dim (int): action dimension of the environment
            conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                                    that describes the cnn architecture
            hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
            activation (nn.modules.activation): Activation function for each layer of MLP
            act_limit (float): the greatest magnitude possible for the action in the environment
        '''
        super().__init__()

        self.pi_cnn = cnn(obs_dim[0],
                          conv_layer_sizes,
                          activation,
                          batchnorm=True)
        self.start_dim = self.calc_shape(obs_dim, self.pi_cnn)
        mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim]
        self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
        self.act_limit = act_limit
示例#3
0
 def calc_shape(self, obs_dim, cnn):
   '''
   Function to determine the shape of the data after the conv layers
   to determine how many neurons for the MLP.
   '''
   C, H, W = obs_dim
   dummy_input = torch.randn(1, C, H, W)
   with torch.no_grad():
     cnn_out = cnn(dummy_input)
   shape = cnn_out.view(-1, ).shape[0]
   return shape
示例#4
0
 def __init__(self, obs_dim, conv_layer_sizes, hidden_sizes, activation):
     '''
     A Convolutional Neural Net for the Critic network
     Args:
         obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
         act_dim (int): action dimension of the environment
         conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                     that describes the cnn architecture
         hidden_sizes (list): list of number of neurons in each layer of MLP
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     self.v_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True)
     self.start_dim = self.calc_shape(obs_dim, self.v_cnn)
     self.v_mlp = mlp([self.start_dim] + list(hidden_sizes) + [1], activation)
示例#5
0
 def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation):
     '''
     A Convolutional Neural Net for the Actor network for discrete outputs
     Network Architecture: (input) -> CNN -> MLP -> (output)
     Assume input is in the shape: (3, 128, 128)
     Args:
         obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
         act_dim (int): action dimension of the environment
         conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                                 that describes the cnn architecture
         hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     
     self.logits_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True)
     self.start_dim = self.calc_shape(obs_dim, self.logits_cnn)
     mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim]
     self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
     # initialise actor network final layer weights to be 1/100 of other weights
     self.logits_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights