def _create_model(self): """Function to initialize Actor-Critic architecture This will create the Actor-Critic net for the agent and initialise the action noise """ # Instantiate networks and optimizers state_dim, action_dim, discrete, action_lim = get_env_properties( self.env, self.network) if isinstance(self.network, str): arch = self.network if self.shared_layers is not None: arch += "s" self.ac = get_model("ac", arch)( state_dim, action_dim, shared_layers=self.shared_layers, policy_layers=self.policy_layers, value_layers=self.value_layers, val_typ="V", discrete=discrete, action_lim=action_lim, activation=self.activation, ).to(self.device) else: self.ac = self.network.to(self.device) actor_params, critic_params = self.ac.get_params() self.optimizer_policy = opt.Adam(actor_params, lr=self.lr_policy) self.optimizer_value = opt.Adam(critic_params, lr=self.lr_value)
def _create_model(self) -> None: """Function to initialize Actor-Critic architecture This will create the Actor-Critic net for the agent and initialise the action noise """ state_dim, action_dim, discrete, action_lim = get_env_properties( self.env, self.network) if isinstance(self.network, str): self.ac = get_model("ac", self.network)( state_dim, action_dim, policy_layers=self.policy_layers, value_layers=self.value_layers, val_type="V", discrete=discrete, action_lim=action_lim, ).to(self.device) else: self.ac = self.network.to(self.device) # action_dim = self.network.action_dim if self.noise is not None: self.noise = self.noise(np.zeros_like(action_dim), self.noise_std * np.ones_like(action_dim)) self.optimizer_policy = opt.Adam(self.ac.actor.parameters(), lr=self.lr_policy) self.optimizer_value = opt.Adam(self.ac.critic.parameters(), lr=self.lr_value)
def _create_model(self) -> None: """Function to initialize Actor-Critic architecture This will create the Actor-Critic net for the agent and initialise the action noise """ state_dim, action_dim, discrete, _ = get_env_properties( self.env, self.network) if discrete: raise Exception( "Discrete Environments not supported for {}.".format( __class__.__name__)) if self.noise is not None: self.noise = self.noise(np.zeros_like(action_dim), self.noise_std * np.ones_like(action_dim)) if isinstance(self.network, str): self.ac = get_model("ac", self.network)( state_dim, action_dim, self.policy_layers, self.value_layers, "Qsa", False, ).to(self.device) else: self.ac = self.network self.ac_target = deepcopy(self.ac).to(self.device) self.optimizer_policy = opt.Adam(self.ac.actor.parameters(), lr=self.lr_policy) self.optimizer_value = opt.Adam(self.ac.critic.parameters(), lr=self.lr_value)
def _create_model(self): """Function to initialize Actor-Critic architecture This will create the Actor-Critic net for the agent and initialise the action noise """ # Instantiate networks and optimizers state_dim, action_dim, discrete, action_lim = get_env_properties( self.env, self.network) if isinstance(self.network, str): self.ac = get_model("ac", self.network)( state_dim, action_dim, policy_layers=self.policy_layers, value_layers=self.value_layers, val_typ="V", discrete=discrete, action_lim=action_lim, activation=self.activation, ).to(self.device) else: self.ac = self.network.to(self.device) self.optimizer_policy = opt.Adam(self.ac.actor.parameters(), lr=self.lr_policy) self.optimizer_value = opt.Adam(self.ac.critic.parameters(), lr=self.lr_value)
def test_get_env_properties(self): """ test getting environment properties """ env = VectorEnv("CartPole-v0", 1) state_dim, action_dim, discrete, _ = get_env_properties(env) assert state_dim == 4 assert action_dim == 2 assert discrete is True env = VectorEnv("Pendulum-v0", 1) state_dim, action_dim, discrete, action_lim = get_env_properties(env) assert state_dim == 3 assert action_dim == 1 assert discrete is False assert action_lim == 2.0
def _create_model(self, **kwargs) -> None: """Initializes class objects Initializes actor-critic architecture, replay buffer and optimizers """ if self.env.action_space is None: self.action_scale = torch.FloatTensor(1.0) self.action_bias = torch.FloatTensor(0.0) else: self.action_scale = torch.FloatTensor( (self.env.action_space.high - self.env.action_space.low) / 2.0 ) self.action_bias = torch.FloatTensor( (self.env.action_space.high + self.env.action_space.low) / 2.0 ) if isinstance(self.network, str): state_dim, action_dim, discrete, _ = get_env_properties( self.env, self.network ) self.ac = get_model("ac", self.network + "12")( state_dim, action_dim, policy_layers=self.policy_layers, value_layers=self.value_layers, val_type="Qsa", discrete=False, sac=True, action_scale=self.action_scale, action_bias=self.action_bias, ) else: self.model = self.network self.ac_target = deepcopy(self.ac) self.critic_params = list(self.ac.critic1.parameters()) + list( self.ac.critic2.parameters() ) self.optimizer_value = opt.Adam(self.critic_params, self.lr_value) self.optimizer_policy = opt.Adam(self.ac.actor.parameters(), self.lr_policy) if self.entropy_tuning: self.target_entropy = -torch.prod( torch.Tensor(self.env.action_space.shape) ).item() self.log_alpha = torch.zeros(1, requires_grad=True) self.optimizer_alpha = opt.Adam([self.log_alpha], lr=self.lr_policy)
def _create_model(self): """Initialize policy network""" state_dim, action_dim, discrete, action_lim = get_env_properties( self.env, self.network) if isinstance(self.network, str): # Instantiate networks and optimizers self.actor = get_model("p", self.network)( state_dim, action_dim, self.policy_layers, "V", discrete, action_lim=action_lim, ).to(self.device) else: self.actor = self.network.to(self.device) self.optimizer_policy = opt.Adam(self.actor.parameters(), lr=self.lr_policy)
def _create_model(self, *args, **kwargs) -> None: """Function to initialize Q-value model This will create the Q-value function of the agent. """ state_dim, action_dim, discrete, _ = get_env_properties( self.env, self.network) if not discrete: raise Exception("Only Discrete Environments are supported for DQN") if isinstance(self.network, str): self.model = get_model("v", self.network + self.dqn_type)( state_dim, action_dim, "Qs", self.value_layers, **kwargs) else: self.model = self.network self.target_model = deepcopy(self.model) self.optimizer = opt.Adam(self.model.parameters(), lr=self.lr_value)
def _create_model(self) -> None: """Initializes class objects Initializes actor-critic architecture, replay buffer and optimizers """ state_dim, action_dim, discrete, _ = get_env_properties( self.env, self.network) if discrete: raise Exception( "Discrete Environments not supported for {}.".format( __class__.__name__)) if isinstance(self.network, str): arch = self.network + "12" if self.shared_layers is not None: arch += "s" self.ac = get_model("ac", arch)( state_dim, action_dim, shared_layers=self.shared_layers, policy_layers=self.policy_layers, value_layers=self.value_layers, val_type="Qsa", discrete=False, ) else: self.ac = self.network if self.noise is not None: self.noise = self.noise(torch.zeros(action_dim), self.noise_std * torch.ones(action_dim)) self.ac_target = deepcopy(self.ac) actor_params, critic_params = self.ac.get_params() self.optimizer_value = torch.optim.Adam(critic_params, lr=self.lr_value) self.optimizer_policy = torch.optim.Adam(actor_params, lr=self.lr_policy)
def _create_model(self) -> None: """Initializes class objects Initializes actor-critic architecture, replay buffer and optimizers """ state_dim, action_dim, discrete, _ = get_env_properties(self.env, self.network) if discrete: raise Exception( "Discrete Environments not supported for {}.".format(__class__.__name__) ) if isinstance(self.network, str): # Below, the "12" corresponds to the Single Actor, Double Critic network architecture self.ac = get_model("ac", self.network + "12")( state_dim, action_dim, policy_layers=self.policy_layers, value_layers=self.value_layers, val_type="Qsa", discrete=False, ) else: self.ac = self.network if self.noise is not None: self.noise = self.noise( np.zeros_like(action_dim), self.noise_std * np.ones_like(action_dim) ) self.ac_target = deepcopy(self.ac) self.critic_params = list(self.ac.critic1.parameters()) + list( self.ac.critic2.parameters() ) self.optimizer_value = torch.optim.Adam(self.critic_params, lr=self.lr_value) self.optimizer_policy = torch.optim.Adam( self.ac.actor.parameters(), lr=self.lr_policy )