def main(args): if args.verbose: logging.basicConfig(level=logging.INFO) elif args.debug: logging.basicConfig(level=logging.DEBUG) # initializations gridworld = GridWorld(args.size, args.interval, args.obstacles, args.vision, args.phase) logging.info("Generated grid world!") logging.info("Visuals created") mc = MonteCarlo(gridworld, mode=args.method) logging.info("Initialized Monte Carlo method") mc.run()
for s in self.q: for a in [Action.HIT, Action.STICK]: self.q[s][a] += alpha * delta * self.E[s][a] self.E[s][a] = Sarsa.GAMMA * self._lambda * self.E[s][a] if __name__ == "__main__": g = graph.graphxy(width=30, x=graph.axis.linear(min=100, max=1000), y=graph.axis.linear(), key=graph.key.key(pos="bl")) plots = [] """ Re-calculate V* """ m = MonteCarlo() for i in range(1, 50000): m.run() for _l in [e / 10.0 for e in range(0, 11, 1)]: print("Training Sarsa(%s)" % _l) s = Sarsa(_l) c1 = [] c2 = [] for j in range(1, 1001): s.run() if j % 100 == 0: c1.append(j) e = s.mean_squared_error(m.q) c2.append(e) if j % 100 == 0: print("Error = %2f" % e) title = "Sarsa(%s)" % _l
L2 = L * L xL2 = (x - L) * (x - L) yL2 = (y - L) * (y - L) c1 = x2 + y2 <= L2 c2 = xL2 + y2 <= L2 c3 = x2 + yL2 <= L2 c4 = xL2 + yL2 <= L2 tc = c1 and c2 and c3 and c4 return 1 if tc else 0 mc = MonteCarlo(seed, samples, target_function_L, 2) print(mc.run()) def target_function(random_vector): ''' Function receives a uniform random vector in [0] ''' x = random_vector[0] y = random_vector[1] c1 = x + y < 1 return 1 if c1 else 0 samples = 10000 mc = MonteCarlo(seed, samples, target_function, 2) print(mc.run())