示例#1
0
    def __init__(
        self,
        mdp_rep_for_rl_pg: MDPRepForRLPG,
        reinforce: bool,
        batch_size: int,
        num_batches: int,
        num_action_samples: int,
        max_steps: int,
        actor_lambda: float,
        critic_lambda: float,
        score_func: Callable[[A, Sequence[float]], Sequence[float]],
        sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]],
        fa_spec: FuncApproxSpec,
        pol_fa_spec: Sequence[FuncApproxSpec]

    ) -> None:
        self.mdp_rep: MDPRepForRLPG = mdp_rep_for_rl_pg
        self.reinforce: bool = reinforce
        self.batch_size: int = batch_size
        self.num_batches: int = num_batches
        self.num_action_samples: int = num_action_samples
        self.max_steps: int = max_steps
        self.actor_lambda: float = actor_lambda
        self.critic_lambda: float = critic_lambda
        self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
            score_func
        self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
            sample_actions_gen_func
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.pol_fa: Sequence[FuncApproxBase] =\
            [s.get_vf_func_approx_obj() for s in pol_fa_spec]
示例#2
0
 def __init__(self, mdp_rep_for_adp: MDPRepForADP, num_samples: int,
              softmax: bool, epsilon: float, epsilon_half_life: float,
              tol: float, fa_spec: FuncApproxSpec) -> None:
     self.mdp_rep: MDPRepForADP = mdp_rep_for_adp
     self.num_samples: int = num_samples
     self.softmax: bool = softmax
     self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
         epsilon, epsilon_half_life)
     self.tol: float = tol
     self.fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
     self.state_action_func: Callable[[S], Set[A]] =\
         self.mdp_rep.state_action_func
示例#3
0
    def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, exploring_start: bool,
                 softmax: bool, epsilon: float, epsilon_half_life: float,
                 num_episodes: int, max_steps: int,
                 fa_spec: FuncApproxSpec) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.exploring_start: bool = exploring_start
        self.softmax: bool = softmax
        self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
            epsilon, epsilon_half_life)
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func