def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP and the mapping to observations C = MDPconverter(g) Ts, R, fMap = C.convert() # find the the best least-squares approximation to the policy, # given only observations, not the state information if useTD: # state-based _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor) else: # state-action-based _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor) # evaluate the policy Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vlspi) if showValue: # expected discounted reward at initial state Vinit = Vlspi[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)
def testAugmented(): from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.mdpmap import MDPconverter from vgdl.agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def plotBackground(env, known=[[]]): if len(known[0]) == 0: from vgdl.mdpmap import MDPconverter g = env._game C = MDPconverter(g, env=env, verbose=False) _, R, _ = C.convert() size = (g.width, g.height) known[0].append((size, C.states, R)) featurePlot(*known[0][0])
def buildOptimal(game_env, discountFactor=0.99): """ Given a game, find the optimal (state-based) policy and return an agent that is playing accordingly. """ from vgdl.mdpmap import MDPconverter C = MDPconverter(env=game_env) Ts, R, _ = C.convert() policy, _ = policyIteration(Ts, R, discountFactor=discountFactor) game_env.reset() def x(*_): s = game_env.getState() #print s i = C.states.index(s) return i #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState())) return PolicyDrivenAgent(policy, x)
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f" % Vinit)
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)