def _test_planner(planner, domain_name, num_problems, timeout): print("Running testing...") env = pddlgym.make("PDDLEnv{}-v0".format(domain_name)) num_problems = min(num_problems, len(env.problems)) for problem_idx in range(num_problems): print("\tTesting problem {} of {}".format(problem_idx + 1, num_problems), flush=True) env.fix_problem_index(problem_idx) state, _ = env.reset() start = time.time() try: plan = planner(env.domain, state, timeout=timeout) except (PlanningTimeout, PlanningFailure) as e: print("\t\tPlanning failed with error: {}".format(e), flush=True) continue # Validate plan on the full test problem. if not validate_strips_plan( domain_file=env.domain.domain_fname, problem_file=env.problems[problem_idx].problem_fname, plan=plan): print("\t\tPlanning returned an invalid plan") continue print("\t\tSuccess, got plan of length {} in {:.5f} seconds".format( len(plan), time.time() - start), flush=True)
def _collect_training_data(self, train_env_name): """Returns X, Y where X are States and Y are sets of objects """ outfile = self._dataset_file_prefix + "_{}.pkl".format(train_env_name) if not self._load_dataset_from_file or not os.path.exists(outfile): inputs = [] outputs = [] env = pddlgym.make("PDDLEnv{}-v0".format(train_env_name)) assert env.operators_as_actions for idx in range(min(self._num_train_problems, len(env.problems))): print("Collecting training data problem {}".format(idx), flush=True) env.fix_problem_index(idx) state, _ = env.reset() try: plan = self._planner(env.domain, state, timeout=60) except (PlanningTimeout, PlanningFailure): print("Warning: planning failed, skipping: {}".format( env.problems[idx].problem_fname)) continue inputs.append(state) objects_in_plan = {o for act in plan for o in act.variables} outputs.append(objects_in_plan) training_data = (inputs, outputs) with open(outfile, "wb") as f: pickle.dump(training_data, f) with open(outfile, "rb") as f: training_data = pickle.load(f) return training_data
def demo_random(env_name, render=True, problem_index=0, verbose=True): env = pddlgym.make("PDDLEnv{}-v0".format(env_name.capitalize())) env.fix_problem_index(problem_index) policy = lambda s: env.action_space.sample(s) video_path = "/tmp/{}_random_demo.mp4".format(env_name) run_demo(env, policy, render=render, verbose=verbose, seed=0, video_path=video_path)
def test_searchandrescue_walls(num_actions_to_test=100): """Test that when we try to move into walls, we stay put. """ rng = np.random.RandomState(0) for level in [1, 2]: env = pddlgym.make(f"SearchAndRescueLevel{level}-v0") for idx in range(len(env.problems)): env.fix_problem_index(idx) state, debug_info = env.reset() all_actions = dropoff, down, left, right, up, pickup = env.get_possible_actions( ) act_to_delta = { dropoff: (0, 0), down: (1, 0), left: (0, -1), right: (0, 1), up: (-1, 0), pickup: (0, 0), } actions = rng.choice(all_actions, size=num_actions_to_test) done = False robot_r, robot_c = dict(state)["robot0"] walls = { dict(state)[k] for k in dict(state) if k.startswith("wall") } for t, act in enumerate(actions): dr, dc = act_to_delta[act] can_r, can_c = robot_r + dr, robot_c + dc if done: break state1, _, done, _ = env.step(act) state2 = env.get_successor_state(state, act) assert state2 == state1 state = state1 new_r, new_c = dict(state)["robot0"] if (can_r, can_c) in walls: # Can't move into walls! assert (new_r, new_c) == (robot_r, robot_c) robot_r, robot_c = new_r, new_c print("Test passed.")
def test_readme_example(): """Make sure that the README example runs """ # Planning with FastForward ff_planner = FF() env = pddlgym.make("PDDLEnvBlocks-v0") state, _ = env.reset() print("Plan:", ff_planner(env.domain, state)) print("Statistics:", ff_planner.get_statistics()) # Planning with FastDownward (--alias seq-opt-lmcut) fd_planner = FD() env = pddlgym.make("PDDLEnvBlocks-v0") state, _ = env.reset() print("Plan:", fd_planner(env.domain, state)) print("Statistics:", fd_planner.get_statistics()) # Planning with FastDownward (--alias lama-first) lama_first_planner = FD(alias_flag="--alias lama-first") env = pddlgym.make("PDDLEnvBlocks-v0") state, _ = env.reset() print("Plan:", lama_first_planner(env.domain, state)) print("Statistics:", lama_first_planner.get_statistics())
def test_planners(): """Make sure that the plans found by the planners succeed in the environments """ planners = [FF(), FD(), FD(alias_flag="--alias lama-first")] envs = ["PDDLEnvBlocks-v0", "PDDLEnvBlocks_operator_actions-v0"] for planner in planners: for env in envs: env = pddlgym.make("PDDLEnvBlocks-v0") state, _ = env.reset() plan = planner(env.domain, state) for act in plan: _, reward, done, _ = env.step(act) assert reward == 1. assert done
def demo_planning(env_name, render=True, probabilistic=False, problem_index=0, verbose=True): env = pddlgym.make("PDDLEnv{}-v0".format(env_name.capitalize())) env.fix_problem_index(problem_index) planner = FD(alias_flag="--alias lama-first") if probabilistic: policy = create_replanning_policy(env, planner) else: policy = create_single_plan_policy(env, planner) video_path = "/tmp/{}_random_demo.mp4".format(env_name) run_demo(env, policy, render=render, verbose=verbose, seed=0, video_path=video_path)
def test_searchandrescue(num_actions_to_test=100, verbose=False): """Test state encoding and decoding """ for level in range(1, 7): env = pddlgym.make(f"SearchAndRescueLevel{level}-v0") if level == 1: assert len(env.problems) == 20 else: assert len(env.problems) == 50 env.fix_problem_index(0) state, debug_info = env.reset() rng = np.random.RandomState(0) all_actions = env.get_possible_actions() actions = rng.choice(all_actions, size=num_actions_to_test) done = False for t, act in enumerate(actions): if verbose: print(f"Taking action {t}/{num_actions_to_test}", end='\r', flush=True) assert state == env._internal_to_state(env._state) assert env._state.literals == env._state_to_internal( state).literals assert env._state.objects == env._state_to_internal(state).objects assert set(env._state.goal.literals) == set( env._state_to_internal(state).goal.literals) assert env.check_goal(state) == done for a in all_actions: ns = env.get_successor_state(state, a) assert ns == env._internal_to_state(env._state_to_internal(ns)) if done: break state, _, done, _ = env.step(act) if verbose: print() print("Test passed.")
class LargePOSARRadius1Env(POSARRadius1Env): height, width = 9, 9 room_locs = [(0, i) for i in range(9)] + [(8, i) for i in range(9)] robot_starts = [(4, 3), (4, 4), (4, 5)] wall_locs = [(1, 1), (2, 4), (1, 4), (1, 5), (2, 1), (2, 7), (2, 8), (3, 0), (3, 2), (4, 6), (4, 7), (5, 1), (5, 3), (5, 4), (6, 2), (6, 5), (6, 6), (6, 8), (7, 3), (7, 4)] fire_locs = [] if __name__ == "__main__": import imageio np.random.seed(0) for env_name in ["PDDLSearchAndRescueLevel7"]: #, "MyopicPOSAR"]: imgs = [] env = pddlgym.make(f"{env_name}-v0") env.fix_problem_index(1) obs, _ = env.reset() print(obs) imgs.append(env.render()) plan = np.random.choice(env.get_possible_actions(), size=50) for act in plan: obs, reward, done, _ = env.step(act) print(obs, reward, done) imgs.append(env.render()) if done: break imageio.mimsave(f"/tmp/{env_name}_random.mp4", imgs)