def monte_carlo_demo(): np.random.seed(101) env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo(0.5) with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi) np.random.seed(101) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi) np.random.seed(101) agent3 = ModelFreeAgent(env) mc = SARSA(0.5) with timer('Timer Monte Carlo Iter'): mc.sarsa(agent3, env) print('return_pi={}'.format(eval_game(env, agent3))) print(agent3.pi)
def monte_carlo_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('PolicyIteration:return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi) np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent3 = TableAgent(env) vi_algo = ValueIteration() vi_algo.value_iteration(agent3) print('ValueIteration:return_pi={}'.format(eval_game(env, agent3))) print(agent3.pi) np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo() with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('MonteCarlo:return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def monte_carlo_demo2(): env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo(0.5) with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def monte_carlo_demo(): env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo() with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi)