def __init__(self, nactions, alpha, gamma, ld, nlevels = 10, resolution = 0.1): self.nactions = nactions self.alpha = alpha self.gamma = gamma self.ld = ld self.cmac = [] self.nlevels = nlevels self.resolution = resolution for a in range(nactions): self.cmac.append(TraceCMAC(self.nlevels, self.resolution, alpha, ld * gamma, replace=True, inc=1.0))
class TDCmac(TD): def __init__(self, alpha, gamma, ld, nlevels = 10, resolution = 0.1): self.nlevels = nlevels self.resolution = resolution self.cmac = TraceCMAC(self.nlevels, self.resolution, alpha, ld * gamma, replace = True, inc = 1.0) self.gamma = gamma def __len__(self): return len(self.cmac) def value(self, vector): return self.cmac.eval(vector) def train(self, pvector, reward, vector): delta = self.delta(pvector, reward, vector) self.cmac.train(pvector, delta) return delta def reset(self): self.cmac.reset()
def __init__(self, alpha, gamma, ld, nlevels = 10, resolution = 0.1): self.nlevels = nlevels self.resolution = resolution self.cmac = TraceCMAC(self.nlevels, self.resolution, alpha, ld * gamma, replace = True, inc = 1.0) self.gamma = gamma