def monte_carlo_demo(): np.random.seed(101) env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo(0.5) with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi) np.random.seed(101) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi) np.random.seed(101) agent3 = ModelFreeAgent(env) mc = SARSA(0.5) with timer('Timer Monte Carlo Iter'): mc.sarsa(agent3, env) print('return_pi={}'.format(eval_game(env, agent3))) print(agent3.pi)
def monte_carlo_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('PolicyIteration:return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi) np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent3 = TableAgent(env) vi_algo = ValueIteration() vi_algo.value_iteration(agent3) print('ValueIteration:return_pi={}'.format(eval_game(env, agent3))) print(agent3.pi) np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo() with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('MonteCarlo:return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def policy_iter_run(test_episode_num=1_000_000): """ Run value iteration algorithm. Parameter introduction please refer to 'policy_iter.py' """ model = PolicyIteration(env) model.policy_iteration() model.test_policy(episode_num=test_episode_num) model.save_fig(fig_path) model.save_result(log_path)
def monte_carlo_demo(): env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo() with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi)