示例#1
0
    def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes,
                 activation):
        '''
        A Convolutional Neural Net for the Actor network for Continuous outputs
        Network Architecture: (input) -> CNN -> MLP -> (output)
        Assume input is in the shape: (3, 128, 128)
        Args:
            obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
            act_dim (int): action dimension of the environment
            conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                                    that describes the cnn architecture
            hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
            activation (nn.modules.activation): Activation function for each layer of MLP
            act_limit (float): the greatest magnitude possible for the action in the environment
        '''
        super().__init__()
        log_std = -0.5 * np.ones(act_dim, dtype=np.float32)
        self.log_std = torch.nn.Parameter(torch.as_tensor(log_std))

        self.mu_cnn = cnn(obs_dim[0],
                          conv_layer_sizes,
                          activation,
                          batchnorm=True)
        self.start_dim = self.calc_shape(obs_dim, self.mu_cnn)
        mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim]
        self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
        # initialise actor network final layer weights to be 1/100 of other weights
        self.mu_mlp[
            -2].weight.data /= 100  # last layer is Identity, so we tweak second last layer weights
示例#2
0
    def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes,
                 activation, act_limit):
        '''
        A Convolutional Neural Net for the Actor network
        Network Architecture: (input) -> CNN -> MLP -> (output)
        Assume input is in the shape: (3, 128, 128)
        Args:
            obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
            act_dim (int): action dimension of the environment
            conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                                    that describes the cnn architecture
            hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
            activation (nn.modules.activation): Activation function for each layer of MLP
            act_limit (float): the greatest magnitude possible for the action in the environment
        '''
        super().__init__()

        self.pi_cnn = cnn(obs_dim[0],
                          conv_layer_sizes,
                          activation,
                          batchnorm=True)
        self.start_dim = self.calc_shape(obs_dim, self.pi_cnn)
        mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim]
        self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
        self.act_limit = act_limit
示例#3
0
 def __init__(self, obs_dim, hidden_sizes, activation):
     '''
     A Multi-Layer Perceptron for the Critic network
     Args:
         obs_dim (int): observation dimension of the environment
         hidden_sizes (list): list of number of neurons in each layer of MLP
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     self.v_net = mlp([obs_dim] + list(hidden_sizes) + [1],  activation)
示例#4
0
 def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
     '''
     A Multi-Layer Perceptron for the Critic network
     Args:
         obs_dim (int): observation dimension of the environment
         act_dim (int): action dimension of the environment
         hidden_sizes (list): list of number of neurons in each layer of MLP
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     self.logits_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation)
     # initialise actor network final layer weights to be 1/100 of other weights
     self.logits_net[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
示例#5
0
 def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
     '''
     A Multi-Layer Perceptron for the gaussian Actor network for continuous actions
     Args:
         obs_dim (int): observation dimension of the environment
         act_dim (int): action dimension of the environment
         hidden_sizes (list): list of number of neurons in each layer of MLP
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     log_std = -0.5*np.ones(act_dim, dtype=np.float32)
     self.log_std = torch.nn.Parameter(torch.as_tensor(log_std))
     self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation)
     self.mu_net[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
示例#6
0
 def __init__(self, vae_weights_path, obs_dim, conv_layer_sizes, hidden_sizes, activation):
     '''
     A Variational Autoencoder Net for the Critic network
     Args:
         vae_weights_path (Str): Path to the vae weights file
         obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
         act_dim (int): action dimension of the environment
         hidden_sizes (list): list of number of neurons in each layer of MLP
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     self.v_vae = VAE()
     self.v_vae.load_weights(vae_weights_path)
     self.v_mlp = mlp([self.v_vae.latent_dim] + list(hidden_sizes) + [1], activation)
示例#7
0
 def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit):
     '''
     A Multi-Layer Perceptron for the Actor network
     Args:
         obs_dim (int): observation dimension of the environment
         act_dim (int): action dimension of the environment
         hidden_sizes (list): list of number of neurons in each layer of MLP
         activation (nn.modules.activation): Activation function for each layer of MLP
         act_limit (float): the greatest magnitude possible for the action in the environment
     '''
     super().__init__()
     pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim]
     self.pi = mlp(pi_sizes, activation, output_activation=nn.Tanh)
     self.act_limit = act_limit
示例#8
0
 def __init__(self, obs_dim, conv_layer_sizes, hidden_sizes, activation):
     '''
     A Convolutional Neural Net for the Critic network
     Args:
         obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
         act_dim (int): action dimension of the environment
         conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                     that describes the cnn architecture
         hidden_sizes (list): list of number of neurons in each layer of MLP
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     self.v_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True)
     self.start_dim = self.calc_shape(obs_dim, self.v_cnn)
     self.v_mlp = mlp([self.start_dim] + list(hidden_sizes) + [1], activation)
示例#9
0
    def __init__(self, vae_weights_path, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation):
        '''
        A Convolutional Neural Net for the Actor network for Continuous outputs
        Network Architecture: (input) -> VAE -> MLP -> (output)
        Assume input is in the shape: (3, 128, 128)
        Args:
            vae_weights_path (Str): Path to the vae weights file
            obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
            act_dim (int): action dimension of the environment
            hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE
            activation (nn.modules.activation): Activation function for each layer of MLP
        '''
        super().__init__()
        log_std = -0.5*np.ones(act_dim, dtype=np.float32)
        self.log_std = torch.nn.Parameter(torch.as_tensor(log_std))

        self.mu_vae = VAE()
        mlp_sizes = [self.mu_vae.latent_dim] + list(hidden_sizes) + [act_dim]
        self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
        # initialise actor network final layer weights to be 1/100 of other weights
        self.mu_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
示例#10
0
    def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes, activation):
        '''
        A Variational Autoencoder Net for the Actor network for discrete outputs
        Network Architecture: (input) -> VAE -> MLP -> (output)
        Assume input is in the shape: (3, 128, 128)
        Args:
            vae_weights_path (Str): Path to the vae weights file
            obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
            act_dim (int): action dimension of the environment
            hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE
            activation (nn.modules.activation): Activation function for each layer of MLP
        '''
        super().__init__()
        
        self.logits_vae = VAE()
        self.logits_vae.load_weights(vae_weights_path)
        mlp_sizes = [self.logits_vae.latent_dim] + list(hidden_sizes) + [act_dim]
        self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)

        # initialise actor network final layer weights to be 1/100 of other weights
        self.logits_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
示例#11
0
 def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation):
     '''
     A Convolutional Neural Net for the Actor network for discrete outputs
     Network Architecture: (input) -> CNN -> MLP -> (output)
     Assume input is in the shape: (3, 128, 128)
     Args:
         obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
         act_dim (int): action dimension of the environment
         conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
                                 that describes the cnn architecture
         hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
         activation (nn.modules.activation): Activation function for each layer of MLP
     '''
     super().__init__()
     
     self.logits_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True)
     self.start_dim = self.calc_shape(obs_dim, self.logits_cnn)
     mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim]
     self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
     # initialise actor network final layer weights to be 1/100 of other weights
     self.logits_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
示例#12
0
    def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes,
                 activation, act_limit):
        '''
        A Variational Autoencoder for the Actor network
        Network Architecture: (input) -> VAE -> MLP -> (output)
        The VAE is pretrained on observation images.
        Assume observation space is in the shape: (3, 128, 128)
        Args:
            vae_weights_path (Str): Path to the vae weights file
            obs_dim (tuple): observation dimension of the environment in the form of (C, H, W)
            act_dim (int): action dimension of the environment
            hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
            activation (nn.modules.activation): Activation function for each layer of MLP
            act_limit (float): the greatest magnitude possible for the action in the environment
        '''
        super().__init__()

        self.pi_vae = VAE()
        self.pi_vae.load_weights(vae_weights_path)
        mlp_sizes = [self.pi_vae.latent_dim] + list(hidden_sizes) + [act_dim]
        self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh)
        self.act_limit = act_limit