Пример #1
0
    def __init__(self, env, actor, critic, action_modifier=None):
        self._env = env
        self._actor = actor
        self._critic = critic
        self._action_modifier = action_modifier

        self._config = Config.get().ddpg.trainer

        self._initialize_target_networks()
        self._initialize_optimizers()

        self._models = {
            'actor': self._actor,
            'critic': self._critic,
            'target_actor': self._target_actor,
            'target_critic': self._target_critic
        }

        self._replay_buffer = ReplayBuffer(self._config.replay_buffer_size)

        # Tensorboard writer
        self._writer = TensorBoard.get_writer()
        self._train_global_step = 0
        self._eval_global_step = 0

        if self._config.use_gpu:
            self._cuda()
Пример #2
0
    def __init__(self, observation_dim, action_dim):
        config = Config.get().safety_layer.constraint_model

        super(ConstraintModel, self)\
            .__init__(observation_dim,
                      action_dim,
                      config.layers,
                      config.init_bound,
                      uniform_,
                      None)
Пример #3
0
    def __init__(self, observation_dim, action_dim):
        super(Critic, self).__init__()

        config = Config.get().ddpg.critic

        self._observation_linear = Linear(observation_dim, config.layers[0])
        self._action_linear = Linear(action_dim, config.layers[0])

        init_fan_in_uniform(self._observation_linear.weight)
        init_fan_in_uniform(self._action_linear.weight)

        self._model = Net(config.layers[0] * 2, 1, config.layers[1:],
                          config.init_bound, init_fan_in_uniform, None)
Пример #4
0
    def train(self):
        self._print_ascii_art()
        print("============================================================")
        print("Initialized SafeExplorer with config:")
        print("------------------------------------------------------------")
        Config.get().pprint()
        print("============================================================")

        env = BallND() if self._config.task == "ballnd" else Spaceship()

        if self._config.use_safety_layer:
            safety_layer = SafetyLayer(env)
            safety_layer.train()

        observation_dim = (seq(env.observation_space.spaces.values()).map(
            lambda x: x.shape[0]).sum())

        actor = Actor(observation_dim, env.action_space.shape[0])
        critic = Critic(observation_dim, env.action_space.shape[0])

        safe_action_func = safety_layer.get_safe_action if safety_layer else None
        ddpg = DDPG(env, actor, critic, safe_action_func)

        ddpg.train()
Пример #5
0
    def __init__(self):
        self._config = Config.get().env.ballnd
        # Set the properties for spaces
        self.action_space = Box(low=-1,
                                high=1,
                                shape=(self._config.n, ),
                                dtype=np.float32)
        self.observation_space = Dict({
            'agent_position':
            Box(low=0, high=1, shape=(self._config.n, ), dtype=np.float32),
            'target_position':
            Box(low=0, high=1, shape=(self._config.n, ), dtype=np.float32)
        })

        # Sets all the episode specific variables
        self.reset()
Пример #6
0
    def __init__(self):

        self._config = Config.get().env.spaceship

        self._width = self._config.length if self._config.is_arena else 1
        self._episode_length = self._config.arena_episode_length \
            if self._config.is_arena else self._config.corridor_episode_length
        # Set the properties for spaces
        self.action_space = Box(low=-1, high=1, shape=(2,), dtype=np.float32)
        self.observation_space = Dict({
            'agent_position': Box(low=0, high=1, shape=(2,), dtype=np.float32),
            'agent_velocity': Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32),
            'target_position': Box(low=0, high=1, shape=(2,), dtype=np.float32)
        })

        # Sets all the episode specific variables         
        self.reset()
Пример #7
0
    def __init__(self, env):
        self._env = env

        self._config = Config.get().safety_layer.trainer

        self._num_constraints = env.get_num_constraints()

        self._initialize_constraint_models()

        self._replay_buffer = ReplayBuffer(self._config.replay_buffer_size)

        # Tensorboard writer
        self._writer = TensorBoard.get_writer()
        self._train_global_step = 0
        self._eval_global_step = 0

        if self._config.use_gpu:
            self._cuda()
Пример #8
0
    def __init__(self, observation_dim, action_dim):
        config = Config.get().ddpg.actor

        super(Actor, self).__init__(observation_dim, action_dim, config.layers,
                                    config.init_bound, init_fan_in_uniform,
                                    torch.tanh)
Пример #9
0
 def __init__(self):
     self._config = Config.get().main.trainer
     self._set_seeds()