def __init__(self, action_selector: ActionSelector, max_exploratory_steps: int = 1000): ActionSelector.__init__(self, action_selector.use_cuda) self.action_selector = action_selector self.exploration = self.action_selector.exploration self.max_exploratory_steps = max_exploratory_steps
def __init__( self, action_selector: ActionSelector, action_space: list, mu: float = 0.0, theta: float = 0.15, max_sigma: float = 0.3, min_sigma: float = 0.3, decay_period: int = 100000, ): ActionSelector.__init__(self, action_selector.use_cuda) self.action_selector = action_selector self.mu = mu self.theta = theta self.sigma = max_sigma self.max_sigma = max_sigma self.min_sigma = min_sigma self.decay_period = decay_period self.action_min = self.action_selector.action_min self.action_max = self.action_selector.action_max self.action_dim = self.action_selector.action_dim self.exploration = True self._reset()
def __init__(self, action_selector: ActionSelector, mu: float, sigma: float): ActionSelector.__init__(self, action_selector.use_cuda) self.action_selector = action_selector self.action_min = self.action_selector.action_min self.action_max = self.action_selector.action_max self.action_dim = self.action_selector.action_dim self.mu = mu self.sigma = sigma self.exploration = True
def __init__( self, action_selector: ActionSelector, action_space: spaces.Discrete, hyper_params: DictConfig, ): ActionSelector.__init__(self, action_selector.use_cuda) self.action_selector = action_selector self.action_space = action_space self.eps = hyper_params.eps self.eps_final = hyper_params.eps_final self.eps_decay = (self.eps - self.eps_final) / hyper_params.max_exploration_frame
def test( self, policy: BaseModel, action_selector: ActionSelector, episode_i: int, update_step: int, ) -> float: """Test policy without random exploration a number of times.""" print("====TEST START====") policy.eval() action_selector.exploration = False episode_rewards = [] for test_i in range(self.experiment_info.test_num): state = self.env.reset() episode_reward = 0 done = False while not done: if self.experiment_info.render_train: self.env.render() action = action_selector(policy, state) state, action, reward, next_state, done = self.step( state, action) episode_reward = episode_reward + reward state = next_state print( f"episode num: {episode_i} | test: {test_i} episode reward: {episode_reward}" ) episode_rewards.append(episode_reward) mean_rewards = np.mean(episode_rewards) print(f"EPISODE NUM: {episode_i} | UPDATE STEP: {update_step} |" f"MEAN REWARD: {np.mean(episode_rewards)}") action_selector.exploration = True print("====TEST END====") return mean_rewards
def __init__(self, device: str): ActionSelector.__init__(self, device)
def __init__(self, action_dim: int, action_range: list, use_cuda: bool): ActionSelector.__init__(self, use_cuda) self.action_dim = action_dim self.action_min = np.array(action_range[0]) self.action_max = np.array(action_range[1])
def __init__(self, use_cuda: bool): ActionSelector.__init__(self, use_cuda)