示例#1
0
def value_iteration_main():
    # 그리드 월드 환경 객체 생성
    env = GridWorld(height=GRID_HEIGHT,
                    width=GRID_WIDTH,
                    start_state=(0, 0),
                    terminal_states=TERMINAL_STATES,
                    transition_reward=-1.0,
                    terminal_reward=-1.0,
                    outward_reward=-1.0)
    env.reset()

    VI = ValueIteration(env)
    VI.start_iteration()

    print(VI.state_values)

    draw_grid_world_state_values_image(
        VI.state_values, 'images/grid_world_vi_optimal_state_values.png',
        GRID_HEIGHT, GRID_WIDTH)

    draw_grid_world_action_values_image(
        VI.calculate_grid_world_optimal_action_values(),
        'images/grid_world_vi_optimal_action_values.png', GRID_HEIGHT,
        GRID_WIDTH, env.NUM_ACTIONS, env.ACTION_SYMBOLS)

    draw_grid_world_optimal_policy_image(
        VI.calculate_optimal_policy(),
        "images/grid_world_vi_optimal_policy.png", GRID_HEIGHT, GRID_WIDTH,
        env.ACTION_SYMBOLS)
示例#2
0
def action_value_prediction_main():
    # 이미지 저장 경로 확인 및 생성
    if not os.path.exists('images/'):
        os.makedirs('images/')

    # 그리드 월드 환경 객체 생성
    env = GridWorld(height=GRID_HEIGHT,
                    width=GRID_WIDTH,
                    start_state=(0, 0),
                    terminal_states=TERMINAL_STATES,
                    transition_reward=-1.0,
                    terminal_reward=-1.0,
                    outward_reward=-1.0)
    env.reset()

    state_action_values, returns = first_visit_mc_prediction(env, 1.0, 10000)
    draw_grid_world_action_values_image(
        state_action_values,
        'images/grid_world_mc_action_values_first_visit.png', GRID_HEIGHT,
        GRID_WIDTH, env.NUM_ACTIONS, env.ACTION_SYMBOLS)

    state_action_values, returns = every_visit_mc_prediction(env, 1.0, 10000)
    draw_grid_world_action_values_image(
        state_action_values,
        'images/grid_world_mc_action_values_every_visit.png', GRID_HEIGHT,
        GRID_WIDTH, env.NUM_ACTIONS, env.ACTION_SYMBOLS)
示例#3
0
def td_learning_main():
    env = GridWorld(height=GRID_HEIGHT,
                    width=GRID_WIDTH,
                    start_state=(2, 2),
                    terminal_states=TERMINAL_STATES,
                    transition_reward=-1.0,
                    terminal_reward=-1.0,
                    outward_reward=-1.0)
    compute_state_values(env, alpha=0.05)
def td_batch_comparison_main():
    env = GridWorld(height=GRID_HEIGHT,
                    width=GRID_WIDTH,
                    start_state=(2, 2),
                    terminal_states=TERMINAL_STATES,
                    transition_reward=-1.0,
                    terminal_reward=-1.0,
                    outward_reward=-1.0)
    print("alpha={0}".format(0.01))
    mc_td_batch_comparison(env, alpha=0.01)

    print("alpha={0}".format(0.005))
    mc_td_batch_comparison(env, alpha=0.005)
示例#5
0
def state_prediction_main():
    # 이미지 저장 경로 확인 및 생성
    if not os.path.exists('images/'):
        os.makedirs('images/')

    # 그리드 월드 환경 객체 생성
    env = GridWorld(height=GRID_HEIGHT,
                    width=GRID_WIDTH,
                    start_state=(0, 0),
                    terminal_states=TERMINAL_STATES,
                    transition_reward=-1.0,
                    terminal_reward=-1.0,
                    outward_reward=-1.0)
    env.reset()

    state_values, returns = first_visit_mc_prediction(env, 1.0, 10000)
    print("First Visit")
    for i in range(GRID_HEIGHT):
        for j in range(GRID_WIDTH):
            print("({0}, {1}): {2:5.2f}".format(i, j, state_values[i, j]))
        print()

    draw_grid_world_state_values_image(
        state_values, 'images/grid_world_mc_state_values_first_visit.png',
        GRID_HEIGHT, GRID_WIDTH)
    print()

    state_values, returns = every_visit_mc_prediction(env, 1.0, 10000)
    print("Every Visit")
    for i in range(GRID_HEIGHT):
        for j in range(GRID_WIDTH):
            print("({0}, {1}): {2:5.2f}".format(i, j, state_values[i, j]))
        print()

    draw_grid_world_state_values_image(
        state_values, 'images/grid_world_mc_state_values_every_visit.png',
        GRID_HEIGHT, GRID_WIDTH)
示例#6
0
def main():
    # 5x5 맵 생성
    env = GridWorld(height=GRID_HEIGHT,
                    width=GRID_WIDTH,
                    start_state=None,
                    terminal_states=[],
                    transition_reward=0.0,
                    outward_reward=-1.0,
                    warm_hole_states=[(A_POSITION, A_PRIME_POSITION, 10.0),
                                      (B_POSITION, B_PRIME_POSITION, 5.0)])

    state_values = calculate_grid_world_state_values(env)

    draw_grid_world_state_values_image(state_values,
                                       'images/grid_world_state_values.png',
                                       GRID_HEIGHT, GRID_WIDTH)

    with np.printoptions(precision=2, suppress=True):
        print(state_values)
示例#7
0
def exploring_start_control_main():
    # 그리드 월드 환경 객체 생성
    env = GridWorld(
        height=GRID_HEIGHT,
        width=GRID_WIDTH,
        start_state=None,   # exploring start
        terminal_states=TERMINAL_STATES,
        transition_reward=-1.0,
        terminal_reward=-1.0,
        outward_reward=-1.0
    )

    # 비어있는 상태-가치 함수를 0으로 초기화하며 생성함
    state_action_values, returns = generate_initial_q_value_and_return(env)

    # 초기 임의 정책 생성
    policy = generate_initial_random_policy(env)

    iter_num = 0

    print("[[[ MC 제어 반복 시작! ]]]")
    while iter_num < MAX_EPISODES:
        iter_num += 1

        episode, visited_state_actions = generate_random_episode_for_trajectory(env, policy)
        print("*** 에피소드 생성 완료 ***")

        first_visit_mc_prediction(state_action_values, returns, episode, visited_state_actions)
        print("*** MC 예측 수행 완료 ***")

        policy, error = generate_greedy_policy(env, state_action_values, policy)
        print("*** 정책 개선 [에러 값: {0:9.7f}], 총 반복 수: {1} ***\n".format(error, iter_num))

    print("[[[ MC 제어 반복 종료! ]]]\n\n")

    draw_grid_world_policy_image(
        policy,
        "images/grid_world_mc_exploring_start_policy.png",
        GRID_HEIGHT, GRID_WIDTH,
        env.ACTION_SYMBOLS
    )
def main():
    # 5x5 맵 생성
    env = GridWorld(height=GRID_HEIGHT,
                    width=GRID_WIDTH,
                    start_state=None,
                    terminal_states=[],
                    transition_reward=0,
                    outward_reward=-1.0,
                    warm_hole_states=[(A_POSITION, A_PRIME_POSITION, 10.0),
                                      (B_POSITION, B_PRIME_POSITION, 5.0)])

    optimal_action_values = calculate_grid_world_optimal_action_values(env)

    draw_grid_world_action_values_image(
        optimal_action_values, 'images/grid_world_optimal_action_values.png',
        GRID_HEIGHT, GRID_WIDTH, env.NUM_ACTIONS, env.ACTION_SYMBOLS)

    print()

    optimal_policy = calculate_optimal_policy(optimal_action_values)
    draw_grid_world_optimal_policy_image(
        optimal_policy, "images/grid_world_optimal_policy.png", GRID_HEIGHT,
        GRID_WIDTH, env.ACTION_SYMBOLS)