def runRewardIRL(self,N=5): opt = deepcopy(self.opt_template) dist = self.getIRLTDist(self.env_template["consumable"],N=N) bdist = self.getIRLDist(N=N, rand=True) dist = [d-bdist for d in dist] print dist domain = self.createStateDomain(waypoints=self.env_template["consumable"], rewardFunction=lambda x,y,z,w: ConsumableGridWorldIRL.rewardIRL(x,y,z,w,dist,self.env_template["consumable"])) opt["domain"] = domain representation = IncrementalTabular(domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain = self.createStateDomain(self.env_template["consumable"]), visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def runTIRL(self, N=5, w=2, pruning=0.5): opt = deepcopy(self.opt_template) dist = self.getIRLDist(N=N) ac = self.getTSCWaypoints(N, w, pruning) domain = self.createStateDomain( waypoints=ac, rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL( x, y, z, w, dist)) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=self.createStateDomain( waypoints=self.env_template["consumable"]), visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def gridworld1_rirl(exp_id=6, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 # Domain: maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding( x, [(7, 5)]), noise=noise, binary=True) #domain = Pinball(noise=0.3) # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=0.3) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 d = GoalPathPlanner(domain, representation, policy) trajs = d.generateTrajectories(N=5) dist = calculateStateDist((10, 7), trajs) # Policy reset policy = eGreedy(representation, epsilon=0.3) representation = Tabular(domain, discretization=discretization) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding( x, [(7, 5)]), rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL( x, y, z, w, dist), noise=noise) pdomain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding( x, [(7, 5)]), noise=noise) opt["domain"] = domain experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=pdomain, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def gridworld1_rirl(exp_id=6, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 # Domain: maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), noise=noise, binary=True) #domain = Pinball(noise=0.3) # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=0.3) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 d = GoalPathPlanner(domain, representation,policy) trajs = d.generateTrajectories(N=5) dist = calculateStateDist((10,7), trajs) # Policy reset policy = eGreedy(representation, epsilon=0.3) representation = Tabular(domain, discretization=discretization) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), rewardFunction= lambda x,y,z,w: ConsumableGridWorldIRL.rewardIRL(x,y,z,w,dist), noise=noise) pdomain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), noise=noise) opt["domain"] = domain experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain = pdomain, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]),np.sum(experiment.result["return"])