def monte_carlo_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('PolicyIteration:return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi) np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent3 = TableAgent(env) vi_algo = ValueIteration() vi_algo.value_iteration(agent3) print('ValueIteration:return_pi={}'.format(eval_game(env, agent3))) print(agent3.pi) np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo() with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('MonteCarlo:return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def policy_iteration_demo2(): env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) agent.pi[:] = 0 print('return3={}'.format(eval_game(env, agent))) agent.pi[:] = 1 print('return6={}'.format(eval_game(env, agent))) agent.pi[97:100] = 0 print('return_ensemble={}'.format(eval_game(env, agent))) pi_algo = PolicyIteration() pi_algo.policy_iteration(agent) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def policy_iteration_demo1(): env = SnakeEnv(0, [3, 6]) agent = TableAgent(env) pi_algo = PolicyIteration() pi_algo.policy_iteration(agent) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def monte_carlo_demo(): np.random.seed(101) env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo(0.5) with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi) np.random.seed(101) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi) np.random.seed(101) agent3 = ModelFreeAgent(env) mc = SARSA(0.5) with timer('Timer Monte Carlo Iter'): mc.sarsa(agent3, env) print('return_pi={}'.format(eval_game(env, agent3))) print(agent3.pi)
def policy_iteration_demo1(): env = SnakeEnv(0, [3, 6]) #0代表不考虑梯子 agent = TableAgent(env) #表agent pi_algo = PolicyIteration() #策略迭代模型 pi_algo.policy_iteration(agent) #获得新一时刻的状态值函数 print 'return_pi={}'.format(eval_game(env, agent)) print agent.pi
def value_iteration_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) vi_algo = ValueIteration() vi_algo.value_iteration(agent) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def policy_iteration_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) pi_algo = PolicyIterationWithTimer() pi_algo.policy_iteration(agent) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi)
def generalized_iteration_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) pi_algo = GeneralizedPolicyIteration() with timer('Timer GeneralizedIter'): pi_algo.generalized_policy_iteration(agent) print('return_pi={}'.format(eval_game(env, agent)))
def value_iteration_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) pi_algo = ValueIteration() with timer('Timer ValueIter'): pi_algo.value_iteration(agent) print 'return_pi={}'.format(eval_game(env, agent))
def monte_carlo_demo(): env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo() with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi)