예제 #1
0
def value_iteration_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    pi_algo = ValueIteration()
    with timer('Timer ValueIter'):
        pi_algo.value_iteration(agent)
    print('return_pi={}'.format(eval_game(env, agent)))
예제 #2
0
def generalized_iteration_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    pi_algo = GeneralizedPolicyIteration()
    with timer('Timer GeneralizedIter'):
        pi_algo.generalized_policy_iteration(agent)
    print('return_pi={}'.format(eval_game(env, agent)))
예제 #3
0
def policy_iteration_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    pi_algo = PolicyIterationWithTimer()
    with timer('Timer PolicyIter'):
        pi_algo.policy_iteration(agent)
    print 'return_pi={}'.format(eval_game(env, agent))