Python QLearningAgent.get_action示例

fig = plt.figure(figsize=(20, 20))

bilbo = QLearningAgent(PLAYER_CHAR)
mondo = World(WORLD_DIM, bilbo=bilbo, obstacle=True)
game_ended = False
epoch = 0
anim = []
rewards = 0

env = mondo.create_env(d)
anim.append((plt.pcolormesh(env, cmap='CMRmap'), ))

while not game_ended and epoch < MAX_EPOCH:
    epoch += 1
    action = bilbo.get_action(0, q_table, possible_moves)
    bilbo.move(inverse_possible_moves[action])()
    game_ended = bilbo.game_ended()
    reward = bilbo.reward()
    rewards = rewards + reward

    env = mondo.create_env(d)
    anim.append((plt.pcolormesh(env, cmap='CMRmap'), ))

im_ani = animation.ArtistAnimation(fig,
                                   anim,
                                   interval=60,
                                   repeat_delay=1000,
                                   blit=False)

writer = animation.FFMpegWriter(fps=epoch)

示例#2

显示文件

文件： Bilbo_q_learning.py 项目： moiraghif/DragonHunting

for ep in range(TOT_EPISODES):
  #recreate the environment
    bilbo = QLearningAgent(PLAYER_CHAR)
    mondo = World(WORLD_DIM, bilbo=bilbo, obstacle=True)
    np.random.seed()
    game_ended = False
    epoch = 0
    tot_reward = 0
    #if ep % 10 == 0:
        #a = plt.imshow(render_world(mondo.world,WORLD_DIM,q_table,ep), animated=True)
        #policy.append((a,))
    while not game_ended and epoch < MAX_EPOCH:
      #the near it gets to the dragon the more random the movement
        epoch += 1
        epsilon_fear = bilbo.fear(epsilon)
        action = bilbo.get_action(epsilon, q_table, possible_moves)
        current_state = bilbo.get_current_state()
      #treasure_gone = bilbo.treasure_gone()

        old_q_val = q_table[current_state][action]
        bilbo.move(inverse_possible_moves[action])()

        new_state = bilbo.get_current_state()
      #treasure_gone = bilbo.treasure_gone()
        game_ended = bilbo.game_ended()
        reward = bilbo.reward()

        if reward == -DRAGON_PENALTY:
            new_q_val = reward
            loss += 1
        elif reward == TREASURE_REWARD: