def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP and the mapping to observations C = MDPconverter(g) Ts, R, fMap = C.convert() # find the the best least-squares approximation to the policy, # given only observations, not the state information if useTD: # state-based _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor) else: # state-action-based _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor) # evaluate the policy Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vlspi) if showValue: # expected discounted reward at initial state Vinit = Vlspi[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f" % Vinit)
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)