def train(env=None):
    agent = SARSAAgent()
    if env is None:
        env = gym.make("FrozenLakeEasy-v0")
    agent.learn(env, episode_count=500)
    show_q_value(agent.Q)
    agent.show_reward_log()
def train(env=None):
    agent = MonteCarloAgent(epsilon=0.1)
    if env is None:
        env = gym.make("FrozenLakeEasy-v0")
    agent.learn(env, episode_count=500)
    show_q_value(agent.Q)
    agent.show_reward_log()
Пример #3
0
def train(env=None):
    trainer = ActorCritic(Actor, Critic)
    if env is None:
        env = gym.make("FrozenLakeEasy-v0")
    actor, _ = trainer.train(env, episode_count=3000)
    print(actor.Q)
    show_q_value(actor.Q)
    actor.show_reward_log()
Пример #4
0
def train():
    agent = QLearningAgent()
    env = gym.make("FrozenLakeEasy-v0")
    # env.render()
    agent.learn(env,
                episode_count=300000,
                render=False,
                gamma=gamma,
                report_interval=100000)
    show_q_value(agent.Q)
    agent.show_reward_log()
Пример #5
0
def train_gamma(env, name, gamma):
    agent = QLearningAgent()
    # env.render()
    agent.learn(env,
                episode_count=300000,
                render=False,
                gamma=gamma,
                report_interval=100000)
    show_q_value(agent.Q, name=name + "_stage.png")
    agent.show_reward_log(name=name + "_figure.png")
    print("game start")
    print("gamma={0} : {1}".format(
        gamma, "goal" if play(agent, env) else "game over"))
Пример #6
0
def train_stg(count=10, name=None):
    for i in range(10):
        print("now: {}th".format(i))
        agent = QLearningAgent()
        env = gym.make("FrozenLakeEasy-v0")
        env.render()
        agent.learn(env,
                    episode_count=500000,
                    render=False,
                    report_interval=100000)
        show_q_value(agent.Q, name=name + "_{}_stage.png".format(i))
        agent.show_reward_log(name=name + "_{}_figure.png".format(i))
        print("game start")
        print("{0}th: {1}".format(i + 1,
                                  "goal" if play(agent, env) else "game over"))
Пример #7
0
def train():
    agent = QLearningAgent()
    env = gym.make("FrozenLakeEasy-v0")
    agent.learn(env, episode_count=500)
    show_q_value(agent.Q)
    agent.show_reward_log()
                    gain = reward + gamma * self.Q[n_state][n_action]

                estimated = self.Q[s][a]
                self.Q[s][a] += learning_rate * (gain - estimated)
                s = n_state

                if self.q_learning:
                    a = self.policy(s, actions)
                else:
                    a = n_action
            else:
                self.log(reward)

            if e != 0 and e % report_interval == 0:
                pass
            #     self.show_reward_log(episode=e)


def train(q_learning):
    env = gym.make("FrozenLakeEasy-v0")
    agent = CompareAgent(q_learning=q_learning)
    agent.learn(env, episode_count=100000)
    return dict(agent.Q)


if __name__ == "__main__":
    with Pool() as pool:
        results = pool.map(train, ([True, False]))
        for r in results:
            show_q_value(r)
Пример #9
0
def train():
    trainer = ActorCritic(Actor, Critc)
    env = gym.make("FrozenLakeEasy-v0")
    actor, critic = trainer.train(env, episode_count=3000)
    show_q_value(actor.Q)
    actor.show_reward_log()
Пример #10
0
def train():
    agent = SARSAAgent()
    env = gym.make("FrozenLakeEasy-v0")
    agent.learn(env)
    show_q_value(agent.Q)
    agent.show_reward_log()
def train():
    agent = MonteCarloAgent(epsilon=0.1)
    env = gym.make("FrozenLakeEasy-v0")
    agent.learn(env)
    show_q_value(agent.Q)
    agent.show_reward_log()