def generate_problems(num_train=50, num_test=10, level=1, **kwargs): domain = PDDLDomainParser(os.path.join(PDDLDIR, f"{DOMAIN_NAME}.pddl"), expect_action_preds=True, operators_as_actions=False) # Create version of the domain for simplicity domain_name_with_level = f"{DOMAIN_NAME}_level{level}" domain.write(os.path.join(PDDLDIR, f"{domain_name_with_level}.pddl")) # Make sure problems are unique seen_problem_ids = set() problem_idx = 0 while problem_idx < num_train + num_test: if problem_idx < num_train: problem_dir = domain_name_with_level else: problem_dir = f"{domain_name_with_level}_test" problem_outfile = f"problem{problem_idx}.pddl" problem_id, problem_filepath = sample_problem(domain, problem_dir, problem_outfile, **kwargs) if problem_id in seen_problem_ids: continue seen_problem_ids.add(problem_id) if problem_is_valid(domain, problem_filepath): problem_idx += 1
def run_probabilistic_planning_demo(env, planner_name, verbose=False, num_epi=20, outdir='/tmp', fps=3): """Probabilistic planning via simple determinization. """ if outdir is None: outdir = "/tmp/{}".format(env_cls.__name__) if not os.path.exists(outdir): os.makedirs(outdir) if env._render: if env._problem_index_fixed: problem_idx = env._problem_idx video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format( planner_name, env.spec.id, problem_idx)) else: video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format( planner_name, env.spec.id)) env = VideoWrapper(env, video_path, fps=fps) avg_reward = 0 for _ in range(num_epi): obs, debug_info = env.reset() domain = PDDLDomainParser(debug_info["domain_file"]) domain.determinize() domain.write("/tmp/domain.pddl") plan = run_planner("/tmp/domain.pddl", debug_info['problem_file'], planner_name) actions = [] for s in plan: a = parse_plan_step( s, env.domain.operators.values(), env.action_predicates, obs.objects, operators_as_actions=env.operators_as_actions ) actions.append(a) tot_reward = 0. for action in actions: if verbose: print("Obs:", obs) if verbose: print("Act:", action) obs, reward, done, _ = env.step(action) env.render() tot_reward += reward if verbose: print("Rew:", reward) if done: break if verbose: print("Final obs:", obs) print("Got total reward:", tot_reward) print() avg_reward += tot_reward/num_epi print("Average reward over {} episodes was {}".format(num_epi, avg_reward)) env.close() if verbose: input("press enter to continue to next problem") return tot_reward