示例#1
0
def generate_dagger_demos(env_name, seeds, fail_obss, fail_actions, mean_steps):
    env = gym.make(env_name)
    agent = BotAgent(env)
    demos = []

    for i in range(len(fail_obss)):
        # Run the expert for one episode
        env.seed(int(seeds[i]))

        new_obs = env.reset()
        agent.on_reset()

        env0_str = env.__str__()

        actions = []
        images = []
        directions = []
        debug_info = {'seed': [int(seeds[i])], 'actions': []}
        try:
            for j in range(min(int(args.dagger_trim_coef * mean_steps), len(fail_obss[i]) - 1)):
                obs = fail_obss[i][j]
                assert check_obss_equality(obs, new_obs), "Observations {} of seed {} don't match".format(j, seeds[i])
                mission = obs['mission']
                action = agent.act(update_internal_state=False)['action']
                _ = agent.bot.take_action(fail_actions[i][j])
                debug_info['actions'].append(fail_actions[i][j])
                new_obs, reward, done, _ = env.step(fail_actions[i][j])
                if done and reward > 0:
                    raise ValueError(
                        "The baby's actions shouldn't solve the task. Env0 {}, Env9{}, Seed {}, actions {}.".format(
                            env0_str, env.__str__(), int(seeds[i]), fail_actions[i]
                        ))
                actions.append(action)
                images.append(obs['image'])
                directions.append(obs['direction'])
            if args.continue_dagger:
                obs = new_obs
                while not done:
                    action = agent.act(obs)['action']
                    debug_info['actions'].append(action)
                    new_obs, reward, done, _ = env.step(action)
                    agent.analyze_feedback(reward, done)
                    actions.append(action)
                    images.append(obs['image'])
                    directions.append(obs['direction'])
            print(debug_info, actions)

            demos.append((mission, blosc.pack_array(np.array(images)), directions, actions))

        except Exception as e:
            logger.exception("error while generating demo #{}: {}. Env0 {}, Env9{}, Seed {}, actions {}.".format(
                len(demos), e, env0_str, env.__str__(), int(seeds[i]), fail_actions[i]))
            continue

    return demos
def generate_demos(env_name, seeds):
    env = gym.make(env_name)
    agent = BotAgent(env)
    demos = []

    for seed in seeds:
        # Run the expert for one episode
        done = False

        env.seed(int(seed))
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                directions.append(obs['direction'])

                obs = new_obs

            if reward > 0:
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))
            if reward == 0:
                logger.info("failed to accomplish the mission")

        except Exception:
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        # logger.info("demo #{}".format(len(demos)))

    return demos