def plot_grid_2_mc():
    test_grids = TEST_GRIDS
    all_test_list = [(key, grid) for key, grid in test_grids.items()]
    sorted(all_test_list, key=lambda x: x[0])
    agent = Agent()
    iters = ITERS
    total_normal_grid_score, total_grid1_score, total_grid2_score, total_grid3_score, total_grid4_score = [],[],[],[],[]
    repeats = REPEATS
    # for n in iters:
    #   print("Running iteration {n}".format(n=n))
    grid2_score, grid4_score = [], []
    for ind, grid_init in all_test_list:
        normalized_score = 0
        for j in range(repeats):
            grid_num = int(ind)  #ind initially is a string.
            if (grid_num < 200) or (grid_num > 300):
                continue

            best_reward = grid_init['best_reward']
            testgrid = Grid(5, random=False, init_pos=grid_init)
            if grid_num in {204, 208}:
                Q, policy = agent.mc_first_visit_control(testgrid.copy(),
                                                         iters=500)
                _, _, mc_reward = agent.run_final_policy(testgrid.copy(),
                                                         Q,
                                                         display=True)
            else:
                continue
            normalized_score += mc_reward - best_reward
            if normalized_score != 0:
                print(
                    "Grid num {0} did not achieve best score".format(grid_num))
def graph_dual_model_performance():
    test_grids = TEST_GRIDS
    all_test_list = [(key, grid) for key, grid in test_grids.items()]
    sorted(all_test_list, key=lambda x: x[0])
    agent = Agent()
    iters = ITERS
    total_normal_grid_score, total_grid1_score, total_grid2_score, total_grid3_score, total_grid4_score = [],[],[],[],[]
    repeats = REPEATS
    for n in iters:
        print("Running iteration {n}".format(n=n))
        normal_grid_score, grid1_score, grid2_score, grid3_score, grid4_score = [],[],[],[],[]
        for ind, grid_init in all_test_list:
            normalized_score = 0
            for j in range(repeats):
                grid_num = int(ind)  #ind initially is a string.
                best_reward = grid_init['best_reward']
                testgrid = Grid(5, random=False, init_pos=grid_init)
                Q, policy = agent.mc_first_visit_control(testgrid.copy(),
                                                         iters=n,
                                                         nn_init=True)
                _, _, dual_model_reward = agent.run_final_policy(
                    testgrid.copy(), Q, nn_init=True, display=False)
                normalized_score += dual_model_reward - best_reward
            if grid_num < 100:
                normal_grid_score.append(normalized_score / repeats)
            elif grid_num < 200:  #grid type 1
                grid1_score.append(normalized_score / repeats)
            elif grid_num < 300:  #grid type 2
                grid2_score.append(normalized_score / repeats)
            elif grid_num < 400:  #grid type 3
                grid3_score.append(normalized_score / repeats)
            else:  #grid type 4
                grid4_score.append(normalized_score / repeats)
        total_normal_grid_score.append(np.mean(normal_grid_score))
        total_grid1_score.append(np.mean(grid1_score))
        total_grid2_score.append(np.mean(grid2_score))
        total_grid3_score.append(np.mean(grid3_score))
        total_grid4_score.append(np.mean(grid4_score))
    # plt.plot(iters, total_normal_grid_score, label="normal grids", color="red")
    plt.plot(iters, total_grid1_score, label='push dilemma', color="blue")
    plt.plot(iters, total_grid2_score, label='switch dilemma', color="green")
    plt.plot(iters, total_grid3_score, label='switch save', color="orange")
    plt.plot(iters, total_grid4_score, label='push get', color="brown")
    plt.legend()
    plt.xlabel("Number of MC Iterations")
    plt.ylabel("Normalized Score")
    plt.title("Dual model performance on all test grids")
    plt.show()