Пример #1
0
    def sample(self,
               env_states: States = None,
               batch_size: int = 1,
               model_states: States = None,
               **kwargs) -> States:
        actions = env_states.observs.copy(
        ) if env_states is not None else np.zeros((batch_size, self.dim))
        actions = actions.astype(bool)
        # set action to all zeros
        for i in range(actions.shape[0]):
            target_name = self.random_state.choice(self.names)
            actions[i] = np.logical_and(
                actions[i], np.logical_not(self.namme_masks[target_name]))
            valid_ixs = np.arange(
                actions.shape[1])[self.name_masks[target_name]]
            actions[i, self.random_state.choice(valid_ixs)] = True
        actions = actions.astype(int)
        dt = (1 if self.dt_sampler is None else self.dt_sampler.calculate(
            batch_size=batch_size, model_states=model_states, **
            kwargs).astype(int))
        model_states.update(actions=actions, dt=dt)

        actions = model_states.actions.astype(bool)
        actions = np.logical_or(actions, self.force_mask)
        actions = self._enforce_soft_constraints(actions)
        model_states.update(actions=actions)
        return model_states
Пример #2
0
 def classic_control_env():
     env = ClassicControl()
     env.reset()
     env = DiscreteEnv(env)
     params = {"actions": {"dtype": np.int64}, "dt": {"dtype": np.float32}}
     states = States(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=np.ones(N_WALKERS), dt=np.ones(N_WALKERS))
     return env, states
Пример #3
0
 def sample(self,
            env_states: States = None,
            batch_size: int = 1,
            model_states: States = None,
            **kwargs) -> States:
     actions = env_states.observs.copy(
     ) if env_states is not None else np.zeros((batch_size, self.dim))
     actions = self.modify_actions(actions)
     model_states.update(actions=actions)
     return model_states
Пример #4
0
 def atari_env():
     env = AtariEnvironment(name="MsPacman-v0",
                            clone_seeds=True,
                            autoreset=True)
     env.reset()
     env = DiscreteEnv(env)
     params = {
         "actions": {
             "dtype": np.int64
         },
         "critic": {
             "dtype": np.float32
         }
     }
     states = States(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=np.ones(N_WALKERS), critic=np.ones(N_WALKERS))
     return env, states