Пример #1
0
plotter.plot_state_actions(mdp.pi, rewards = grid.reward_states, sinks = grid.sink_states)
q.rollout()
a = mdp.pi.get_next(State(0, 0))
print "action: " + str(a)
tup = q.Q.preprocess(0, 0, a)
print q.Q.dataset[tup]
print "Actual: " + str(np.mean(q.Q.dataset[tup]))
print "predicted: " + str(q.Q.get(State(0, 0), a))

for ac in mdp.pi.available_actions:
    if ac != a:
        print "Seeing for action: " + str(ac)
        tup = q.Q.preprocess(0, 0, ac)
        if tup in q.Q.dataset:
            print "Actual: " + str(np.mean(q.Q.dataset[tup]))
            #print np.mean(q.Q.dataset[tup])            
        else:
            print "No actual"
        print "predicted: " + str(q.Q.get(State(0, 0), ac))
    
#q.animate=True
#q.rollout()

an.count_states(q.get_states())
an.show_states()