def run_planning_demo(env, planner_name, outdir='/tmp', fps=3, verbose=False, seed=None, check_reward=True): if outdir is None: outdir = "/tmp/{}".format(env_cls.__name__) if not os.path.exists(outdir): os.makedirs(outdir) if env._render: if env._problem_index_fixed: problem_idx = env._problem_idx video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format( planner_name, env.spec.id, problem_idx)) else: video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format( planner_name, env.spec.id)) env = VideoWrapper(env, video_path, fps=fps) if seed is not None: env.seed(seed) obs, debug_info = env.reset() plan = run_planner(debug_info['domain_file'], debug_info['problem_file'], planner_name) actions = [] for s in plan: a = parse_plan_step( s, env.domain.operators.values(), env.action_predicates, obs.objects, operators_as_actions=env.operators_as_actions ) actions.append(a) tot_reward = 0. for action in actions: if verbose: print("Obs:", obs) if verbose: print("Act:", action) obs, reward, done, _ = env.step(action) env.render() tot_reward += reward if verbose: print("Rew:", reward) if done: break if verbose: print("Final obs:", obs) print() env.close() if check_reward: assert tot_reward > 0 if verbose: input("press enter to continue to next problem") return tot_reward
def run_planning_agent_demo(env, oaru, outdir="out", fps=3, verbose=False, seed=None, planner_name="ff"): if seed is not None: env.seed(seed) if env._render: if env._problem_index_fixed: problem_idx = env._problem_idx video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format( planner_name, env.spec.id, problem_idx)) else: video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format( planner_name, env.spec.id)) env = VideoWrapper(env, video_path, fps=fps) obs, debug_info = env.reset() type_predicates = generate_type_predicates(env.unwrapped) tqdm.tqdm.write("Planning...") plan = run_planner(debug_info['domain_file'], debug_info['problem_file'], "ff") actions = [] for s in plan: a = parse_plan_step( s, env.domain.operators.values(), env.action_predicates, obs.objects, operators_as_actions=env.operators_as_actions ) actions.append(a) tqdm.tqdm.write("Generating state trajectory...") state_trajectory = [ (env.render(), get_state(env.unwrapped, type_predicates)) ] tot_reward = 0 for action in tqdm.tqdm(actions): if verbose: # tqdm.write(f"Obs: {list(obs}") tqdm.tqdm.write(f"Act: {action}") obs, reward, done, _ = env.step(action) # env.render() state_trajectory.append( (env.render(), get_state(env, type_predicates)) ) record_transition(state_trajectory[-2], state_trajectory[-1], oaru) tot_reward += reward if verbose: tqdm.tqdm.write(f"Rew: {reward}") if done: break if verbose: # print("Final obs:", obs) tqdm.tqdm.write(f"Total reward: {tot_reward}") env.close()
def get_plan_trajectory(env, verbose=False, seed=None, partial_observability=None): if seed is not None: env.seed(seed) obs, debug_info = env.reset() type_predicates = generate_type_predicates(env) plan = run_planner(debug_info['domain_file'], debug_info['problem_file'], "ff", timeout=300) a_lib_gmt = get_gmt_action_library(env) action_trajectory = [ a_lib_gmt[a_name].instantiate(a_args) for a_name, *a_args in map(str.split, plan) ] actions = [] for s in plan: a = parse_plan_step(s, env.domain.operators.values(), env.action_predicates, obs.objects, operators_as_actions=env.operators_as_actions) actions.append(a) state_trajectory = [get_state(env, type_predicates)] for action in actions: if verbose: print("Obs:", obs) print("Act:", action) obs, _, done, _ = env.step(action) # env.render() state_trajectory.append(get_state(env, type_predicates)) if done: break if verbose: print("Final obs:", obs) print() env.close() if verbose: input("press enter to continue to next problem") if partial_observability: rng = random.Random(seed) # seen_atoms = set() # for state in state_trajectory: # seen_atoms.update(state.atoms) lo, hi = partial_observability for state in state_trajectory: n_selected = rng.randint(min(lo, len(state.atoms)), min(hi, len(state.atoms))) selected_atoms = rng.sample(state.atoms, n_selected) for atom in selected_atoms: state.atoms.remove(atom) state.uncertain_atoms.add(atom) return state_trajectory, action_trajectory
def run_probabilistic_planning_demo(env, planner_name, verbose=False, num_epi=20, outdir='/tmp', fps=3): """Probabilistic planning via simple determinization. """ if outdir is None: outdir = "/tmp/{}".format(env_cls.__name__) if not os.path.exists(outdir): os.makedirs(outdir) if env._render: if env._problem_index_fixed: problem_idx = env._problem_idx video_path = os.path.join(outdir, 'planning_{}_{}_{}_demo.gif'.format( planner_name, env.spec.id, problem_idx)) else: video_path = os.path.join(outdir, 'planning_{}_{}_demo.gif'.format( planner_name, env.spec.id)) env = VideoWrapper(env, video_path, fps=fps) avg_reward = 0 for _ in range(num_epi): obs, debug_info = env.reset() domain = PDDLDomainParser(debug_info["domain_file"]) domain.determinize() domain.write("/tmp/domain.pddl") plan = run_planner("/tmp/domain.pddl", debug_info['problem_file'], planner_name) actions = [] for s in plan: a = parse_plan_step( s, env.domain.operators.values(), env.action_predicates, obs.objects, operators_as_actions=env.operators_as_actions ) actions.append(a) tot_reward = 0. for action in actions: if verbose: print("Obs:", obs) if verbose: print("Act:", action) obs, reward, done, _ = env.step(action) env.render() tot_reward += reward if verbose: print("Rew:", reward) if done: break if verbose: print("Final obs:", obs) print("Got total reward:", tot_reward) print() avg_reward += tot_reward/num_epi print("Average reward over {} episodes was {}".format(num_epi, avg_reward)) env.close() if verbose: input("press enter to continue to next problem") return tot_reward