Пример #1
0
    def visualize(self):
        nr, nc = self.maze.shape
        z = -0.1
        a = self.agent
        plot_line_seg(0, 0, z, nr, 0, z, 'e1', size=0.2, color='red')
        plot_line_seg(0, 0, z, 0, nc, z, 'e2', size=0.2, color='red')
        plot_line_seg(0, nc, z, nr, nc, z, 'e3', size=0.2, color='red')
        plot_line_seg(nr, 0, z, nr, nc, z, 'e4', size=0.2, color='red')
        plot_3d(*get_midpoint_for_loc(a.i, a.j), z, 'agent', color='blue', size=1)
        plot_3d(*get_midpoint_for_loc(nr-1, nc-1), z, 'goal', color='green', size=1)

        x, y = np.where(self.maze == WALL)
        plot_3d(x + 0.5, y + 0.5, [z]*len(x), 'wall', color='red', size=1)
 def visualize3d(self):
     # First we need to check if it is :in bounds:
     nr, nc = self.env.shape
     z = 0.1
     a = self.mousy
     # plot_3d(x)
     plot_line_seg(0, 0, z, nr, 0, z, 'e1', size=0.2, color='red')
     plot_line_seg(0, 0, z, 0, nc, z, 'e2', size=0.2, color='red')
     plot_line_seg(0, nc, z, nr, nc, z, 'e3', size=0.2, color='red')
     plot_line_seg(nr, 0, z, nr, nc, z, 'e4', size=0.2, color='red')
     plot_3d(*get_midpoint_for_loc(a.i, a.j),
             z,
             'mousy',
             color='blue',
             size=1)
     plot_3d(*get_midpoint_for_loc(3, 3), z, 'goal', color='green', size=1)
     xarr, yarr = np.where(self.env == -1)
     plot_3d(xarr + 0.5, yarr + 0.5, [z] * len(xarr), 'obstacles', size=1.0)
Пример #3
0
    def visualize(self):
        nr, nc = self.env.shape
        z = -0.1
        a = self.mousy
        plot_line_seg(0, 0, z, nr, 0, z, 'e1', size=0.2, color='red')
        plot_line_seg(0, 0, z, 0, nc, z, 'e2', size=0.2, color='red')
        plot_line_seg(0, nc, z, nr, nc, z, 'e3', size=0.2, color='red')
        plot_line_seg(nr, 0, z, nr, nc, z, 'e4', size=0.2, color='red')
        plot_3d(*get_midpoint_for_loc(a.i, a.j),
                z,
                'mousy',
                color='blue',
                size=1)
        plot_3d(*get_midpoint_for_loc(nr - 1, nc - 1),
                z,
                'goal',
                color='green',
                size=1)

        xarr, yarr = np.where(self.env == -1)
        plot_3d(xarr + 0.5, yarr + 0.5, [z] * len(xarr), 'obstacles', size=1.0)
def main():
    size = 8
    q = QLearning(size**2, 4)

    go_ahead = False

    while not go_ahead:
        m = make_test_maze(size)
        m.visualize3d()
        conti = input()
        if conti.lower() == 'n':
            continue
        go_ahead = True
    max_episodes = 400
    switch_episodes = 200

    for i in range(max_episodes):
        m.reset()
        print(i, end=" ")
        final_score = 0

        while not m.has_won():

            # EXPLORATION VS EXPLOITATION

            ### Exploration
            if random.random() > anneal_probability(
                    i, max_episodes, switch_episodes,
                    0.5) or i < switch_episodes:
                # list me all the moves possible for the agent
                moves = m.compute_possible_moves()
                # shuffle the moves
                random.shuffle(moves)
                # move: tuple, move_idx: bottom(0), top(1), left (2), right(3)
                move, move_idx = moves[0]

            ### Exploitation
            else:
                moves = m.all_actions
                s = m.state_for_agent(m.mousy)
                move_idx = np.argmax(q.q[s])
                move = moves[move_idx]

            at = move_idx
            st = m.state_for_agent(m.mousy)
            score = m.do_a_move(move)
            final_score += score
            rt = score
            # print(score)
            # lm = m.mousy()  #last action of the agent
            st1 = m.state_for_agent(m.mousy)
            # m.visualize3d()
            # time.sleep(0.001)
            q.update(st, at, rt, st1)
        time.sleep(0.01)
        print(f"Finished episode with final score of {final_score} ")
    print(q.q)
    m.reset()
    m.visualize3d()
    agents = []
    while not m.has_won():
        time.sleep(0.5)
        # Pick the state of the agent
        s = m.state_for_agent(m.mousy)

        # Select the action that has the highest action value
        a_idx = np.argmax(q.q[s])
        agents.append(m.mousy)
        # Tell our agent to make a move
        m.do_a_move(m.all_actions[a_idx])
        m.visualize3d()
    for ii, (a1, a2) in enumerate(zip(agents, agents[1:])):
        plot_line_seg(a1.i + 0.5,
                      a1.j + 0.5,
                      0,
                      a2.i + 0.5,
                      a2.j + 0.5,
                      0,
                      f'path{ii}',
                      size=0.1)
    m.visualize3d()

    time.sleep(0.3)

    m.reset()