plotter.plot_state_actions(mdp.pi, rewards = grid.reward_states, sinks = grid.sink_states) q.rollout() a = mdp.pi.get_next(State(0, 0)) print "action: " + str(a) tup = q.Q.preprocess(0, 0, a) print q.Q.dataset[tup] print "Actual: " + str(np.mean(q.Q.dataset[tup])) print "predicted: " + str(q.Q.get(State(0, 0), a)) for ac in mdp.pi.available_actions: if ac != a: print "Seeing for action: " + str(ac) tup = q.Q.preprocess(0, 0, ac) if tup in q.Q.dataset: print "Actual: " + str(np.mean(q.Q.dataset[tup])) #print np.mean(q.Q.dataset[tup]) else: print "No actual" print "predicted: " + str(q.Q.get(State(0, 0), ac)) #q.animate=True #q.rollout() an.count_states(q.get_states()) an.show_states()