def __init__(self, mdp_rep_for_rl: MDPRepForRLTabular, softmax: bool, epsilon: float, epsilon_half_life: float, num_episodes: int, max_steps: int) -> None: self.mdp_rep: MDPRepForRLTabular = mdp_rep_for_rl self.softmax: bool = softmax self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func( epsilon, epsilon_half_life) self.num_episodes: int = num_episodes self.max_steps: int = max_steps
def __init__(self, mdp_rep_for_adp: MDPRepForADP, num_samples: int, softmax: bool, epsilon: float, epsilon_half_life: float, tol: float, fa_spec: FuncApproxSpec) -> None: self.mdp_rep: MDPRepForADP = mdp_rep_for_adp self.num_samples: int = num_samples self.softmax: bool = softmax self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func( epsilon, epsilon_half_life) self.tol: float = tol self.fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj() self.state_action_func: Callable[[S], Set[A]] =\ self.mdp_rep.state_action_func
def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, softmax: bool, epsilon: float, epsilon_half_life: float, num_episodes: int, max_steps: int, fa_spec: FuncApproxSpec) -> None: self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl self.softmax: bool = softmax self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func( epsilon, epsilon_half_life) self.num_episodes: int = num_episodes self.max_steps: int = max_steps self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj() self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj() self.state_action_func = self.mdp_rep.state_action_func
def __init__( self, mab: MabEnv, time_steps: int, num_episodes: int, epsilon: float, epsilon_half_life: float = 1e8, count_init: int = 0, mean_init: float = 0., ) -> None: super().__init__(mab=mab, time_steps=time_steps, num_episodes=num_episodes) self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func( epsilon, epsilon_half_life) self.count_init: int = count_init self.mean_init: float = mean_init