phi, theta0, policy=policy, normalize_phi=False, mu_next=1000) methods = [] alpha = 0.2 mu = 2 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.4, 0.5 gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(0.03, .1) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .004 lam = 1. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha)
import td import examples from task import LinearDiscreteValuePredictionTask import numpy as np import features n = 14 n_feat = 4 mdp = examples.BoyanChain(n, n_feat) phi = features.spikes(n_feat, n) gamma = .95 p0 = np.zeros(n_feat) task = LinearDiscreteValuePredictionTask(mdp, gamma, phi, p0) # define the methods to examine gtd2 = td.GTD2(alpha=0.5, beta=0.5, phi=phi) gtd2.name = "GTD2" gtd2.color = "#0F6E08" gtd = td.GTD(alpha=0.5, beta=0.5, phi=phi) gtd.name = "GTD" gtd.color = "#6E086D" methods = [] alpha = .5 mu = 2. gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "#6E086D" methods.append(gtd)