#vals = { s : learner.value(s) for s in range(gw.nstates) } #print vals #gw.draw_values(vals) # for s in range(gw.nstates): # a = learner.best(s) # pi[s] = learner.best(s) # gw.redraw() # for i in gw.sindices: # pi[i] = i % 8 gw.set_arrows(pi) gw.background() # for i in gw.sindices[11:]: # gw.state2circle(i) # gw.current = i # for a in gw.actions: # gw.move(a) # print a # time.sleep(.1) # gw.current = i # s = gw.endstates[0] # gw.follow(s-1,pi.__getitem__) for i in gw.sindices: gw.follow(i,pi.__getitem__)
#vals = { s : learner.value(s) for s in range(gw.nstates) } #print vals #gw.draw_values(vals) # for s in range(gw.nstates): # a = learner.best(s) # pi[s] = learner.best(s) # gw.redraw() # for i in gw.sindices: # pi[i] = i % 8 gw.set_arrows(pi) gw.background() # for i in gw.sindices[11:]: # gw.state2circle(i) # gw.current = i # for a in gw.actions: # gw.move(a) # print a # time.sleep(.1) # gw.current = i # s = gw.endstates[0] # gw.follow(s-1,pi.__getitem__) for i in gw.sindices: gw.follow(i, pi.__getitem__)