Пример #1
0
from qlearner import QLearner
import pylab as plt

my_learner = QLearner()
my_learner.load_maze('/u/braun/tlab/QLearner/data/reward_4x4.npy',
                     '/u/braun/tlab/QLearner/data/meta_4x4.txt')

#print ("testing data load\n\n")

#my_learner.display_Q()
#my_learner.display_R()

print("begin training...")

reward = my_learner.train(0.7)

my_learner.display_Q()
my_learner.display_R()

steps = my_learner.test(7)  # 7 foods in 4x4 maze
print("steps")
print(steps)
print("")

plt.hist(reward, 50, normed=1, facecolor='g', alpha=0.75)
plt.xlabel('Episodes required to reach 200')
plt.ylabel('Frequency')
plt.title('Histogram')
plt.show()
    # Initialise environment and agent
    wrapper = CartPoleWrapperDiscrete()
    agent = QLearner(wrapper=wrapper, seed=run)

    style.use('fivethirtyeight')

    fig = plt.figure()
    plt.axis([0, args.episodes, 0, 300])
    plt.xlabel('Episodes')
    plt.ylabel('AVG Reward last 50 episodes')

    # For each episode, train the agent on the environment and record the
    # reward of each episode
    for episode in range(num_episodes):
        rewards[episode] = agent.train()
        if (episode % 50) == 0 and episode != 0:
            avg_last = float(sum(rewards[episode - 50:episode])) / 50
            plt.scatter(episode, avg_last)
            plt.pause(0.05)
        # Check if environment is solved
        if wrapper.solved(rewards[:episode]):
            end_episode = episode
            break

    # Record and print performance
    runtime_per_run.append(timer() - start)
    rewards_per_run['run' + str(run)] = rewards
    if end_episode >= 99:
        print('average reward of last 100 episodes of run', run, '=',
              float(sum(rewards[-100:])) / 100)