endstates = [32, 2016, 1024, 1040, 1056, 1072] gw = GridworldGui(nrows=32,ncols=64,endstates=endstates, walls=[]) #gw.updategui=False #gw.draw_state_labels() #learner = TDQ(8,81,0.1,0.9,0.9) #learner = TD(81,0.1,0.9,0.9) #learner = Sarsa(8,81, 0.3, 0.9,0.9, 0.4) learner = SampleModelValueIteration(8,81) # rw_model, transition_model # pdb.set_trace() # v,pi = learner.learn(100,gw,verbose=True) v,pi = gw.value_iteration() #pi = np.ones(gw.nstates,dtype='int') #vals = { s : learner.value(s) for s in range(gw.nstates) } #print vals #gw.draw_values(vals) # for s in range(gw.nstates): # a = learner.best(s) # pi[s] = learner.best(s) # gw.redraw() # for i in gw.sindices: # pi[i] = i % 8 gw.set_arrows(pi)
endstates = [32, 2016, 1024, 1040, 1056, 1072] gw = GridworldGui(nrows=32, ncols=64, endstates=endstates, walls=[]) #gw.updategui=False #gw.draw_state_labels() #learner = TDQ(8,81,0.1,0.9,0.9) #learner = TD(81,0.1,0.9,0.9) #learner = Sarsa(8,81, 0.3, 0.9,0.9, 0.4) learner = SampleModelValueIteration(8, 81) # rw_model, transition_model # pdb.set_trace() # v,pi = learner.learn(100,gw,verbose=True) v, pi = gw.value_iteration() #pi = np.ones(gw.nstates,dtype='int') #vals = { s : learner.value(s) for s in range(gw.nstates) } #print vals #gw.draw_values(vals) # for s in range(gw.nstates): # a = learner.best(s) # pi[s] = learner.best(s) # gw.redraw() # for i in gw.sindices: # pi[i] = i % 8 gw.set_arrows(pi)