Пример #1
0
lam = 0.
lstd = td.RecursiveLSPELambda(lam=lam, alpha=alpha, phi=phi, gamma=gamma)
lstd.name = r"LSPE({}) $\alpha$={}".format(lam, alpha)
lstd.color = "g"
methods.append(lstd)

alpha = 1.
lam = .0
lstd = td.RecursiveLSPELambdaCO(lam=lam, alpha=alpha, phi=phi, gamma=gamma)
lstd.name = r"LSPE({})-TO $\alpha$={}".format(lam, alpha)
lstd.color = "g"
methods.append(lstd)

lam = 0.
eps = 10
lstd = td.RecursiveLSTDLambdaJP(lam=lam, eps=eps, phi=phi, gamma=gamma)
lstd.name = r"LSTD-TO({}) $\epsilon$={}".format(lam, eps)
lstd.color = "g"
lstd.ls = "-."
methods.append(lstd)

lam = 0.
eps = 0.01
lstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi, gamma=gamma)
lstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps)
lstd.color = "g"
lstd.ls = "-."
methods.append(lstd)
#
alpha = 1.
lam = .4
Пример #2
0
n_random = 800
mdp = examples.CorruptedChain(n_states=n)
phi = features.corrupted_rbfs(n_S=n, n_rbfs=5, n_random=n_random)
gamma = .9
n_feat = phi.dim
p0 = np.zeros(n_feat)
pol = np.zeros((n, 2))
pol[:10, 0] = 1
pol[10:, 1] = 1
policy = policies.Discrete(prop_table=pol)
task = LinearDiscreteValuePredictionTask(mdp, gamma, phi, p0, policy=policy)

# define the methods to examine
methods = []  # [td0, gtd, gtd2]

lstd = td.RecursiveLSTDLambdaJP(lam=0, eps=1000, phi=phi)
lstd.name = r"LSTD({}) $\ell_2 \tau={}$".format(0, 0)
lstd.color = "b"
methods.append(lstd)
#for eps in np.power(10,np.arange(-1,4)):
lstd = td.LSTDLambdaJP(lam=0, tau=0.8, phi=phi)
lstd.name = r"LSTD({}) $\ell_2 \tau={}$".format(0, .8)
lstd.color = "b"
#methods.append(lstd)

tau = 0.01
lstd = regtd.DLSTD(lam=0, nonreg_ids=[0], phi=phi, tau=tau)
lstd.name = r"D-LSTD({}) $\tau={}$".format(0, tau)
lstd.color = "b"
methods.append(lstd)