示例#1
0
def build_env(out_dir, _seed, env_name, num_env, victim_type, victim_index,
              mask_victim, mask_victim_kwargs, debug):
    pre_wrapper = GymCompeteToOurs if env_name.startswith('multicomp/') else None

    agent_wrappers = {}
    if mask_victim:
        agent_wrappers = make_mask_agent_wrappers(env_name, victim_index, **mask_victim_kwargs)

    if victim_type == 'none':
        our_idx = 0
    else:
        our_idx = 1 - victim_index

    def env_fn(i):
        return utils.make_env(env_name, _seed, i, out_dir, our_idx,
                              pre_wrapper=pre_wrapper, agent_wrappers=agent_wrappers)

    if not debug and num_env > 1:
        make_vec_env = make_subproc_vec_multi_env
    else:
        make_vec_env = make_dummy_vec_multi_env
    multi_venv = make_vec_env([functools.partial(env_fn, i) for i in range(num_env)])

    if victim_type == 'none':
        assert multi_venv.num_agents == 1, "No victim only works in single-agent environments"
    else:
        assert multi_venv.num_agents == 2, "Need two-agent environment when victim"

    return multi_venv, our_idx
示例#2
0
def build_env(out_dir, _seed, env_name, num_env, victim_type, victim_index,
              mask_victim, mask_victim_kwargs, lookback_params, debug):
    pre_wrappers = []
    if env_name.startswith('multicomp/'):
        pre_wrappers.append(GymCompeteToOurs)
    if lookback_params['lb_num'] > 0:
        pre_wrappers.append(OldMujocoResettableWrapper)

    agent_wrappers = {}
    if mask_victim:
        agent_wrappers = make_mask_agent_wrappers(env_name, victim_index,
                                                  **mask_victim_kwargs)

    if victim_type == 'none':
        our_idx = 0
    else:
        our_idx = 1 - victim_index

    def env_fn(i):
        return modelfree.envs.wrappers.make_env(env_name,
                                                _seed,
                                                i,
                                                out_dir,
                                                our_idx,
                                                pre_wrappers=pre_wrappers,
                                                agent_wrappers=agent_wrappers)

    if not debug and num_env > 1:
        make_vec_env = make_subproc_vec_multi_env
    else:
        make_vec_env = make_dummy_vec_multi_env
    multi_venv = make_vec_env(
        [functools.partial(env_fn, i) for i in range(num_env)])
    if debug and lookback_params['lb_num'] > 0:
        multi_venv = DebugVenv(multi_venv)

    if victim_type == 'none':
        assert multi_venv.num_agents == 1, "No victim only works in single-agent environments"
    else:
        assert multi_venv.num_agents == 2, "Need two-agent environment when victim"

    return multi_venv, our_idx
示例#3
0
def score_agent(_run, _seed, env_name, agent_a_path, agent_b_path, agent_a_type, agent_b_type,
                record_traj, record_traj_params, transparent_params, num_env,
                videos, video_params, mask_agent_index, noisy_agent_index,
                noisy_agent_magnitude, mask_agent_noise):
    save_dir = video_params['save_dir']
    if videos:
        if save_dir is None:
            score_ex_logger.info("No directory provided for saving videos; using a tmpdir instead,"
                                 "but videos will be saved to Sacred run directory")
            tmp_dir = tempfile.TemporaryDirectory()
            save_dir = tmp_dir.name
        else:
            tmp_dir = None
        video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)]
    pre_wrappers = [GymCompeteToOurs] if 'multicomp' in env_name else []

    agent_wrappers = {}
    if mask_agent_index is not None:
        mask_agent_kwargs = {}
        if mask_agent_noise is not None:
            mask_agent_kwargs['noise_magnitude'] = mask_agent_noise

        agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index, **mask_agent_kwargs)

    video_params = utils.sacred_copy(video_params)  # Sacred issue #499

    def env_fn(i):
        env = make_env(env_name, _seed, i, None,
                       pre_wrappers=pre_wrappers,
                       agent_wrappers=agent_wrappers)
        if videos:
            if video_params['annotated']:
                if 'multicomp' in env_name:
                    assert num_env == 1, "pretty videos requires num_env=1"
                    env = AnnotatedGymCompete(env, env_name, agent_a_type, agent_a_path,
                                              agent_b_type, agent_b_path, mask_agent_index,
                                              **video_params['annotation_params'])
                else:
                    warnings.warn(f"Annotated videos not supported for environment '{env_name}'")
            env = VideoWrapper(env, video_dirs[i], video_params['single_file'])
        return env
    env_fns = [functools.partial(env_fn, i) for i in range(num_env)]

    if num_env > 1:
        venv = make_subproc_vec_multi_env(env_fns)
    else:
        venv = make_dummy_vec_multi_env(env_fns)

    if record_traj:
        venv = TrajectoryRecorder(venv, record_traj_params['agent_indices'])

    if venv.num_agents == 1 and agent_b_path != 'none':
        raise ValueError("Set agent_b_path to 'none' if environment only uses one agent.")

    agent_paths = [agent_a_path, agent_b_path]
    agent_types = [agent_a_type, agent_b_type]
    zipped = list(zip(agent_types, agent_paths))
    agents = [load_policy(policy_type, policy_path, venv, env_name, i, transparent_params)
              for i, (policy_type, policy_path) in enumerate(zipped[:venv.num_agents])]

    if noisy_agent_index is not None:
        agents[noisy_agent_index] = NoisyAgentWrapper(agents[noisy_agent_index],
                                                      noise_annealer=lambda: noisy_agent_magnitude)

    score = get_empirical_score(venv, agents)

    for agent in agents:
        if agent.sess is not None:
            agent.sess.close()

    if record_traj:
        save_paths = venv.save(save_dir=record_traj_params['save_dir'])
        for save_path in save_paths:
            score_ex.add_artifact(save_path, name="victim_activations.npz")

    venv.close()

    if videos:
        for env_video_dir in video_dirs:
            try:
                for file_path in os.listdir(env_video_dir):
                    _save_video_or_metadata(env_video_dir, file_path)

            except FileNotFoundError:
                warnings.warn("Can't find path {}; no videos from that path added as artifacts"
                              .format(env_video_dir))

        if tmp_dir is not None:
            tmp_dir.cleanup()

    for observer in score_ex.observers:
        if hasattr(observer, 'dir'):
            _clean_video_directory_structure(observer)

    return score