Exemplo n.º 1
0
def monte_carlo_demo():
    np.random.seed(101)
    env = SnakeEnv(10, [3, 6])
    agent = ModelFreeAgent(env)
    mc = MonteCarlo(0.5)
    with timer('Timer Monte Carlo Iter'):
        mc.monte_carlo_opt(agent, env)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)

    np.random.seed(101)
    agent2 = TableAgent(env)
    pi_algo = PolicyIteration()
    with timer('Timer PolicyIter'):
        pi_algo.policy_iteration(agent2)
    print('return_pi={}'.format(eval_game(env, agent2)))
    print(agent2.pi)

    np.random.seed(101)
    agent3 = ModelFreeAgent(env)
    mc = SARSA(0.5)
    with timer('Timer Monte Carlo Iter'):
        mc.sarsa(agent3, env)
    print('return_pi={}'.format(eval_game(env, agent3)))
    print(agent3.pi)
Exemplo n.º 2
0
def monte_carlo_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent2 = TableAgent(env)
    pi_algo = PolicyIteration()
    with timer('Timer PolicyIter'):
        pi_algo.policy_iteration(agent2)
    print('PolicyIteration:return_pi={}'.format(eval_game(env, agent2)))
    print(agent2.pi)

    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent3 = TableAgent(env)
    vi_algo = ValueIteration()
    vi_algo.value_iteration(agent3)
    print('ValueIteration:return_pi={}'.format(eval_game(env, agent3)))
    print(agent3.pi)

    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = ModelFreeAgent(env)
    mc = MonteCarlo()
    with timer('Timer Monte Carlo Iter'):
        mc.monte_carlo_opt(agent, env)
    print('MonteCarlo:return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)
Exemplo n.º 3
0
def test_easy():
    np.random.seed(0)
    sum_opt = 0
    sum_0 = 0
    sum_1 = 0
    env = SnakeEnv(0, [3, 6])
    for i in range(10000):
        sum_opt += eval_game(env, policy_ref)
        sum_0 += eval_game(env, policy_0)
        sum_1 += eval_game(env, policy_1)
    print('opt avg={}'.format(sum_opt / 10000.0))
    print('0 avg={}'.format(sum_0 / 10000.0))
    print('1 avg={}'.format(sum_1 / 10000.0))
Exemplo n.º 4
0
def policy_iteration_demo2():
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    agent.pi[:] = 0
    print('return3={}'.format(eval_game(env, agent)))
    agent.pi[:] = 1
    print('return6={}'.format(eval_game(env, agent)))
    agent.pi[97:100] = 0
    print('return_ensemble={}'.format(eval_game(env, agent)))
    pi_algo = PolicyIteration()
    pi_algo.policy_iteration(agent)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)
def first_easy():
    sum_opt = 0
    sum_0 = 0
    sum_1 = 0
    env = SnakeEnv(0, [3, 6])
    countNum = 10000
    for i in range(countNum):
        sum_opt += eval_game(env, policy_ref)
        sum_0 += eval_game(env, policy_0)
        sum_1 += eval_game(env, policy_1)
    print('policy_ref avg={}'.format(sum_opt / countNum))
    print('policy_0 avg={}'.format(sum_0 / countNum))
    print('policy_1 avg={}'.format(sum_1 / countNum))
def monte_carlo_demo():
    env = SnakeEnv(10, [3, 6])
    agent = ModelFreeAgent(env)
    mc = MonteCarlo()
    with timer('Timer Monte Carlo Iter'):
        mc.monte_carlo_opt(agent, env)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)

    agent2 = TableAgent(env)
    pi_algo = PolicyIteration()
    with timer('Timer PolicyIter'):
        pi_algo.policy_iteration(agent2)
    print('return_pi={}'.format(eval_game(env, agent2)))
    print(agent2.pi)
Exemplo n.º 7
0
def policy_iteration_demo1():
    env = SnakeEnv(0, [3, 6])
    agent = TableAgent(env)
    pi_algo = PolicyIteration()
    pi_algo.policy_iteration(agent)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)
Exemplo n.º 8
0
def policy_iteration_demo1():
    env = SnakeEnv(0, [3, 6])  #0代表不考虑梯子
    agent = TableAgent(env)  #表agent
    pi_algo = PolicyIteration()  #策略迭代模型
    pi_algo.policy_iteration(agent)  #获得新一时刻的状态值函数
    print 'return_pi={}'.format(eval_game(env, agent))
    print agent.pi
Exemplo n.º 9
0
def value_iteration_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    vi_algo = ValueIteration()
    vi_algo.value_iteration(agent)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)
Exemplo n.º 10
0
def policy_iteration_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    pi_algo = PolicyIterationWithTimer()
    pi_algo.policy_iteration(agent)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)
Exemplo n.º 11
0
def generalized_iteration_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    pi_algo = GeneralizedPolicyIteration()
    with timer('Timer GeneralizedIter'):
        pi_algo.generalized_policy_iteration(agent)
    print('return_pi={}'.format(eval_game(env, agent)))
def monte_carlo_demo2():
    env = SnakeEnv(10, [3, 6])
    agent = ModelFreeAgent(env)
    mc = MonteCarlo(0.5)
    with timer('Timer Monte Carlo Iter'):
        mc.monte_carlo_opt(agent, env)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)
Exemplo n.º 13
0
def value_iteration_demo():
    np.random.seed(0)
    env = SnakeEnv(10, [3, 6])
    agent = TableAgent(env)
    pi_algo = ValueIteration()
    with timer('Timer ValueIter'):
        pi_algo.value_iteration(agent)
    print 'return_pi={}'.format(eval_game(env, agent))