Exemplo n.º 1
0
            food = list(set(food))

        # Start the algorithm.
        sarsa = Sarsa(BarnState((0,0), food, max_size), epsilon=epsilon, alpha=alpha, gamma=gamma)
        sarsa.seed(int(100 * time.time()))

        # keep track of how much do we move the q.
        track = []

        for it in range(1, max_iters + 1):

            if it % 10 == 0:
                print "Scenario %d: %d/%d\r" % (n, it, max_iters) ,
                sys.stdout.flush()

            history, corrections = sarsa.iterate()
            track.append(numpy.sqrt(sum(map(lambda x: x*x, corrections))))
            
            # We're just selecting nice places to evaluate the current policy and create a picture.
            if (it % 10 ** int(log10(it)) == 0) and (it / 10 ** int(log10(it)) in [1, 2, 4, 8]):
                print " evaluationg current policy at %d ..." % it                    
                history, reward = sarsa.eval(max_size ** 2)
            
                # Plot this.
                plot_evaluation(history, "Scenario %d at iteration %d with reward %d" % (n, it, reward), max_size, food, "scenario-%d-iteration-%d.png" % (n, it))

        pylab.clf()
        pylab.plot(track)
        pylab.savefig("scenario-%d-learning.png" % (n, ))
                
            
Exemplo n.º 2
0
    for f in food:
        pylab.annotate('food', xy=f, size=5, bbox=dict(boxstyle="round4,pad=.5", fc="0.8"), ha='center')
    
    
    for i in range(len(path) - 1):
        pylab.arrow(path[i][0], path[i][1], path[i+1][0] - path[i][0], path[i+1][1] - path[i][1])


# Parameters.
max_size = 20
food = [(0,8), (4,4), (1,1), (8,8), (6,2), (12, 15), (17,2), (4, 12), (17, 17), (12, 1)]

# Start the algorithm.
sarsa = Sarsa(BarnState((0,0), food, max_size), epsilon=0.1, alpha=0.1, gamma=0.2)
sarsa.seed(int(100* time.time()))

plot_in = [10, 100, 200, 400, 600, 1000, 1500, 2000, 4000, 5000, 6000, 8000, 10000, 12000, 15000, 20000] 
for i in range(max(plot_in) + 1):
    sarsa.iterate()

    if i % 10 == 0:
        print i
    
    if i in plot_in:
        plot_path([s.position for s in sarsa.history])
        pylab.savefig('/tmp/simple-path-4-%d.png' % i)
        print i