示例#1
0
endstates = [32, 2016, 1024, 1040, 1056, 1072]
gw = GridworldGui(nrows=32,ncols=64,endstates=endstates, walls=[])

#gw.updategui=False
#gw.draw_state_labels()

#learner = TDQ(8,81,0.1,0.9,0.9)
#learner = TD(81,0.1,0.9,0.9)
#learner = Sarsa(8,81, 0.3, 0.9,0.9, 0.4)
learner = SampleModelValueIteration(8,81)

# rw_model, transition_model
# pdb.set_trace()
# v,pi = learner.learn(100,gw,verbose=True)

v,pi = gw.value_iteration()
#pi = np.ones(gw.nstates,dtype='int')

#vals = { s : learner.value(s) for s in range(gw.nstates) }
#print vals
#gw.draw_values(vals)

# for s in range(gw.nstates):
#     a = learner.best(s)
#     pi[s] = learner.best(s)

# gw.redraw()

# for i in gw.sindices:
#     pi[i] = i % 8
gw.set_arrows(pi)
示例#2
0
endstates = [32, 2016, 1024, 1040, 1056, 1072]
gw = GridworldGui(nrows=32, ncols=64, endstates=endstates, walls=[])

#gw.updategui=False
#gw.draw_state_labels()

#learner = TDQ(8,81,0.1,0.9,0.9)
#learner = TD(81,0.1,0.9,0.9)
#learner = Sarsa(8,81, 0.3, 0.9,0.9, 0.4)
learner = SampleModelValueIteration(8, 81)

# rw_model, transition_model
# pdb.set_trace()
# v,pi = learner.learn(100,gw,verbose=True)

v, pi = gw.value_iteration()
#pi = np.ones(gw.nstates,dtype='int')

#vals = { s : learner.value(s) for s in range(gw.nstates) }
#print vals
#gw.draw_values(vals)

# for s in range(gw.nstates):
#     a = learner.best(s)
#     pi[s] = learner.best(s)

# gw.redraw()

# for i in gw.sindices:
#     pi[i] = i % 8
gw.set_arrows(pi)