rmax=1000) pi = [gw.linear_policy(w0, s) for s in range(gw.nstates)] gw.set_arrows(pi) gw.background() gw.mainloop() if test_walls: gw = GridworldGui(nrows=5, ncols=5, endstates=[0], walls=[(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3)]) try: t = pickle.load(open("walls_trace.pck")) except: t = gw.trace(1000, show=False) pickle.dump(t, open("walls_trace.pck", "w"), pickle.HIGHEST_PROTOCOL) policy0 = np.zeros(gw.nfeatures()) # TODO - The tolerances for lsqr need to be related to the tolerances for the policy. Otherwise the number of iterations will be far larger than needed. w0, weights0 = LSPI(t, 0.003, gw, policy0, maxiter=100, method="opt", show=True, debug=False) pi = [gw.linear_policy(w0, s) for s in range(gw.nstates)] gw.set_arrows(pi) gw.background()
#! /usr/bin/env python """ Author: Jeremy M. Stober Program: TEST_GUI.PY Date: Wednesday, June 6 2012 Description: Code to test gui changes. """ from gridworld.gridworldgui import GridworldGui from lspi import LSPI import numpy as np endstates = [32, 2016, 1024, 1040, 1056, 1072] gw = GridworldGui(nrows=32,ncols=64,endstates=endstates,walls=[]) t = gw.trace(10000) z = np.zeros(gw.nfeatures()) #import pdb #pdb.set_trace() #w = LSPI(t,0.0001,gw,z) print(gw.phi(0,0)) print(gw.phi(0,1)) print(w)
policy0 = np.zeros(gw.nfeatures()) t = [] # TODO - The tolerances for lsqr need to be related to the tolerances for the policy. Otherwise the number of iterations will be far larger than needed. w0, weights0 = LSPIRmax(t, 0.003, gw, policy0, maxiter = 1000, show = True, resample_epsilon = 0.0, rmax = 1000) pi = [gw.linear_policy(w0,s) for s in range(gw.nstates)] gw.set_arrows(pi) gw.background() gw.mainloop() if test_walls: gw = GridworldGui(nrows=5,ncols=5,endstates= [0], walls=[(1,1),(1,2),(1,3),(2,1),(2,2),(2,3),(3,1),(3,2),(3,3)]) try: t = pickle.load(open("walls_trace.pck")) except: t = gw.trace(1000, show=False) pickle.dump(t,open("walls_trace.pck","w"),pickle.HIGHEST_PROTOCOL) policy0 = np.zeros(gw.nfeatures()) # TODO - The tolerances for lsqr need to be related to the tolerances for the policy. Otherwise the number of iterations will be far larger than needed. w0, weights0 = LSPI(t, 0.003, gw, policy0, maxiter=100, method="opt", show=True, debug=False) pi = [gw.linear_policy(w0,s) for s in range(gw.nstates)] gw.set_arrows(pi) gw.background() gw.mainloop() if test_lspi: gw = GridworldGui(nrows = 9, ncols = 9, endstates = [0], walls = []) try: t = pickle.load(open("lspi_trace.pck")) except:
#! /usr/bin/env python """ Author: Jeremy M. Stober Program: TEST_GUI.PY Date: Wednesday, June 6 2012 Description: Code to test gui changes. """ from gridworld.gridworldgui import GridworldGui from lspi import LSPI import numpy as np endstates = [32, 2016, 1024, 1040, 1056, 1072] gw = GridworldGui(nrows=32,ncols=64,endstates=endstates,walls=[]) t = gw.trace(10000) z = np.zeros(gw.nfeatures()) #import pdb #pdb.set_trace() #w = LSPI(t,0.0001,gw,z) print gw.phi(0,0) print gw.phi(0,1) print w