def grid_world1_sliding(exp_id=3, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.slidingWindowEncoding(x,3), noise=noise) opt["domain"] = domain # Representation representation = IncrementalTabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=exp) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def createSlidingDomain(self,k): return ConsumableGridWorldIRL(self.env_template["consumable"], mapname=self.env_template["map"], encodingFunction= lambda x: ConsumableGridWorldIRL.slidingWindowEncoding(x,k), noise=self.env_template["noise"])