Exemplo n.º 1
0
    def __init__(self, alpha, discount, env, epsilon=0.2):
        super().__init__(alpha, discount, env)

        self.explore_policy = policy.EpsilonGreedyPolicy(
            env.state_space, env.action_space, self.qvalues, epsilon)

        self.next_action = self.get_explore_action(env.get_state())
Exemplo n.º 2
0
    def __init__(self, alpha, epsilon, discount, environment):
        super().__init__(alpha, epsilon, discount, environment)

        self.policy = policy.EpsilonGreedyPolicy(environment.state_space,
                                                 environment.action_space,
                                                 self.qvalues)
        self.explore_policy = self.policy
Exemplo n.º 3
0
    def __init__(self, alpha, discount, environment, n, epsilon=0.2):
        super().__init__(alpha, discount, environment)

        self.n = n
        self.epsilon = epsilon
        self.policy = policy.EpsilonGreedyPolicy(environment.state_space,
                                                 environment.action_space,
                                                 self.qvalues, self.epsilon)
        self.explore_policy = self.policy

        self.init_episode(environment)
Exemplo n.º 4
0
    def __init__(self, alpha, discount, env, episilon=0.2):
        super().__init__(alpha, discount, env)

        ssp = env.state_space
        asp = env.action_space

        self.optimal_policy = policy.GreedyPolicy(ssp, asp, self.qvalues,
                                                  episilon)
        self.explore_policy = policy.EpsilonGreedyPolicy(
            ssp, asp, self.qvalues, episilon)
        self.draw_policy = self.optimal_policy

        self.returns = defaultdict(list)
        self.episode = []