Exemplo n.º 1
0
def generate_problems(num_train=50, num_test=10, level=1, **kwargs):
    domain = PDDLDomainParser(os.path.join(PDDLDIR, f"{DOMAIN_NAME}.pddl"),
                              expect_action_preds=True,
                              operators_as_actions=False)

    # Create version of the domain for simplicity
    domain_name_with_level = f"{DOMAIN_NAME}_level{level}"
    domain.write(os.path.join(PDDLDIR, f"{domain_name_with_level}.pddl"))

    # Make sure problems are unique
    seen_problem_ids = set()

    problem_idx = 0
    while problem_idx < num_train + num_test:
        if problem_idx < num_train:
            problem_dir = domain_name_with_level
        else:
            problem_dir = f"{domain_name_with_level}_test"
        problem_outfile = f"problem{problem_idx}.pddl"
        problem_id, problem_filepath = sample_problem(domain, problem_dir,
                                                      problem_outfile,
                                                      **kwargs)
        if problem_id in seen_problem_ids:
            continue
        seen_problem_ids.add(problem_id)
        if problem_is_valid(domain, problem_filepath):
            problem_idx += 1
Exemplo n.º 2
0
def run_probabilistic_planning_demo(env, planner_name, verbose=False, num_epi=20, outdir='/tmp', fps=3):
    """Probabilistic planning via simple determinization.
    """
    if outdir is None:
        outdir = "/tmp/{}".format(env_cls.__name__)
        if not os.path.exists(outdir):
            os.makedirs(outdir)

    if env._render:
        if env._problem_index_fixed:
            problem_idx = env._problem_idx
            video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format(
                planner_name, env.spec.id, problem_idx))
        else:
            video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format(
                planner_name, env.spec.id))
        env = VideoWrapper(env, video_path, fps=fps)

    avg_reward = 0
    for _ in range(num_epi):
        obs, debug_info = env.reset()
        domain = PDDLDomainParser(debug_info["domain_file"])
        domain.determinize()
        domain.write("/tmp/domain.pddl")

        plan = run_planner("/tmp/domain.pddl", debug_info['problem_file'], planner_name)

        actions = []
        for s in plan:
            a = parse_plan_step(
                    s, 
                    env.domain.operators.values(), 
                    env.action_predicates,
                    obs.objects, 
                    operators_as_actions=env.operators_as_actions
                )
            actions.append(a)

        tot_reward = 0.
        for action in actions:
            if verbose:
                print("Obs:", obs)

            if verbose:
                print("Act:", action)

            obs, reward, done, _ = env.step(action)
            env.render()
            tot_reward += reward
            if verbose:
                print("Rew:", reward)

            if done:
                break

        if verbose:
            print("Final obs:", obs)
            print("Got total reward:", tot_reward)
            print()

        avg_reward += tot_reward/num_epi

    print("Average reward over {} episodes was {}".format(num_epi, avg_reward))
    env.close()
    if verbose:
        input("press enter to continue to next problem")
    return tot_reward