示例#1
0
文件: boyan.py 项目: xuxingc/tdlearn
alpha = td.RMalpha(10., 0.5)
lam = 0.
td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi)
td0.name = r"TD({}) $\alpha={}t^{{-{} }}$".format(lam, alpha.c, alpha.mu)
methods.append(td0)

alpha = td.DabneyAlpha()
lam = 0.
td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi)
td0.name = r"TD({}) $\alpha$=aut.".format(lam)
methods.append(td0)

alpha = 0.2
mu = 0.0001
lam = 1.
tdc = td.TDCLambda(lam=lam, alpha=alpha, beta=alpha * mu, phi=phi)
tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu)
tdc.color = "r"
methods.append(tdc)

lam = .8
eps = 10000
lstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi)
lstd.name = r"LSTD({})".format(lam)
methods.append(lstd)

lam = .0
eps = 100
lstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi)
lstd.name = r"LSTD({})".format(lam)
methods.append(lstd)
示例#2
0
td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma)
td0.name = r"TD({}) $\alpha$={}".format(lam, alpha)
td0.color = "k"
methods.append(td0)

alpha = .004
lam = 1.
td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma)
td0.name = r"TD({}) $\alpha$={}".format(lam, alpha)
td0.color = "k"
methods.append(td0)

lam = 1.
alpha = 0.004
mu = 0.0001
tdc = td.TDCLambda(alpha=alpha, mu=mu, lam=lam, phi=phi, gamma=gamma)
tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu)
tdc.color = "b"
methods.append(tdc)

alpha = .5
lam = 0.0
lstd = td.RecursiveLSPELambda(lam=lam, alpha=alpha, phi=phi, gamma=gamma)
lstd.name = r"LSPE({}) $\alpha$={}".format(lam, alpha)
lstd.color = "g"
methods.append(lstd)

lam = 0.0
eps = 100000
lstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi, gamma=gamma)
lstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps)
示例#3
0
                                    gamma,
                                    phi,
                                    theta0,
                                    policy=policy,
                                    normalize_phi=True,
                                    mu_next=1000,
                                    mu_iter=1000,
                                    mu_restarts=8)

#states, _, _, _, _ = mdp.samples_cached(n_iter=1000, n_restarts=15,
#                                        policy=policy, seed=8000)

lam = 0.0
alpha = 0.00002
mu = .0002
tdc = td.TDCLambda(alpha=alpha, mu=mu, lam=lam, phi=phi)
tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu)

lam = 0.0
alpha = td.RMalpha(.00006, 0.02)
beta = td.RMalpha(.00001, 0.1)
tdcrm = td.TDCLambda(alpha=alpha, beta=beta, lam=lam, phi=phi)
tdcrm.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu)

lam = 0.
eps = 10000
rlstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi)
rlstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps)

lam = 0.
eps = 1000000