def generate_dagger_demos(env_name, seeds, fail_obss, fail_actions, mean_steps): env = gym.make(env_name) agent = BotAgent(env) demos = [] for i in range(len(fail_obss)): # Run the expert for one episode env.seed(int(seeds[i])) new_obs = env.reset() agent.on_reset() env0_str = env.__str__() actions = [] images = [] directions = [] debug_info = {'seed': [int(seeds[i])], 'actions': []} try: for j in range(min(int(args.dagger_trim_coef * mean_steps), len(fail_obss[i]) - 1)): obs = fail_obss[i][j] assert check_obss_equality(obs, new_obs), "Observations {} of seed {} don't match".format(j, seeds[i]) mission = obs['mission'] action = agent.act(update_internal_state=False)['action'] _ = agent.bot.take_action(fail_actions[i][j]) debug_info['actions'].append(fail_actions[i][j]) new_obs, reward, done, _ = env.step(fail_actions[i][j]) if done and reward > 0: raise ValueError( "The baby's actions shouldn't solve the task. Env0 {}, Env9{}, Seed {}, actions {}.".format( env0_str, env.__str__(), int(seeds[i]), fail_actions[i] )) actions.append(action) images.append(obs['image']) directions.append(obs['direction']) if args.continue_dagger: obs = new_obs while not done: action = agent.act(obs)['action'] debug_info['actions'].append(action) new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) directions.append(obs['direction']) print(debug_info, actions) demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) except Exception as e: logger.exception("error while generating demo #{}: {}. Env0 {}, Env9{}, Seed {}, actions {}.".format( len(demos), e, env0_str, env.__str__(), int(seeds[i]), fail_actions[i])) continue return demos
def generate_demos(env_name, seeds): env = gym.make(env_name) agent = BotAgent(env) demos = [] for seed in seeds: # Run the expert for one episode done = False env.seed(int(seed)) obs = env.reset() agent.on_reset() actions = [] mission = obs["mission"] images = [] directions = [] try: while not done: action = agent.act(obs)['action'] new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) directions.append(obs['direction']) obs = new_obs if reward > 0: demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) if reward == 0: logger.info("failed to accomplish the mission") except Exception: logger.exception("error while generating demo #{}".format( len(demos))) continue # logger.info("demo #{}".format(len(demos))) return demos