def __init__(self, env, actor, critic, action_modifier=None): self._env = env self._actor = actor self._critic = critic self._action_modifier = action_modifier self._config = Config.get().ddpg.trainer self._initialize_target_networks() self._initialize_optimizers() self._models = { 'actor': self._actor, 'critic': self._critic, 'target_actor': self._target_actor, 'target_critic': self._target_critic } self._replay_buffer = ReplayBuffer(self._config.replay_buffer_size) # Tensorboard writer self._writer = TensorBoard.get_writer() self._train_global_step = 0 self._eval_global_step = 0 if self._config.use_gpu: self._cuda()
def __init__(self, observation_dim, action_dim): config = Config.get().safety_layer.constraint_model super(ConstraintModel, self)\ .__init__(observation_dim, action_dim, config.layers, config.init_bound, uniform_, None)
def __init__(self, observation_dim, action_dim): super(Critic, self).__init__() config = Config.get().ddpg.critic self._observation_linear = Linear(observation_dim, config.layers[0]) self._action_linear = Linear(action_dim, config.layers[0]) init_fan_in_uniform(self._observation_linear.weight) init_fan_in_uniform(self._action_linear.weight) self._model = Net(config.layers[0] * 2, 1, config.layers[1:], config.init_bound, init_fan_in_uniform, None)
def train(self): self._print_ascii_art() print("============================================================") print("Initialized SafeExplorer with config:") print("------------------------------------------------------------") Config.get().pprint() print("============================================================") env = BallND() if self._config.task == "ballnd" else Spaceship() if self._config.use_safety_layer: safety_layer = SafetyLayer(env) safety_layer.train() observation_dim = (seq(env.observation_space.spaces.values()).map( lambda x: x.shape[0]).sum()) actor = Actor(observation_dim, env.action_space.shape[0]) critic = Critic(observation_dim, env.action_space.shape[0]) safe_action_func = safety_layer.get_safe_action if safety_layer else None ddpg = DDPG(env, actor, critic, safe_action_func) ddpg.train()
def __init__(self): self._config = Config.get().env.ballnd # Set the properties for spaces self.action_space = Box(low=-1, high=1, shape=(self._config.n, ), dtype=np.float32) self.observation_space = Dict({ 'agent_position': Box(low=0, high=1, shape=(self._config.n, ), dtype=np.float32), 'target_position': Box(low=0, high=1, shape=(self._config.n, ), dtype=np.float32) }) # Sets all the episode specific variables self.reset()
def __init__(self): self._config = Config.get().env.spaceship self._width = self._config.length if self._config.is_arena else 1 self._episode_length = self._config.arena_episode_length \ if self._config.is_arena else self._config.corridor_episode_length # Set the properties for spaces self.action_space = Box(low=-1, high=1, shape=(2,), dtype=np.float32) self.observation_space = Dict({ 'agent_position': Box(low=0, high=1, shape=(2,), dtype=np.float32), 'agent_velocity': Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32), 'target_position': Box(low=0, high=1, shape=(2,), dtype=np.float32) }) # Sets all the episode specific variables self.reset()
def __init__(self, env): self._env = env self._config = Config.get().safety_layer.trainer self._num_constraints = env.get_num_constraints() self._initialize_constraint_models() self._replay_buffer = ReplayBuffer(self._config.replay_buffer_size) # Tensorboard writer self._writer = TensorBoard.get_writer() self._train_global_step = 0 self._eval_global_step = 0 if self._config.use_gpu: self._cuda()
def __init__(self, observation_dim, action_dim): config = Config.get().ddpg.actor super(Actor, self).__init__(observation_dim, action_dim, config.layers, config.init_bound, init_fan_in_uniform, torch.tanh)
def __init__(self): self._config = Config.get().main.trainer self._set_seeds()