def build_env(out_dir, _seed, env_name, num_env, victim_type, victim_index, mask_victim, mask_victim_kwargs, debug): pre_wrapper = GymCompeteToOurs if env_name.startswith('multicomp/') else None agent_wrappers = {} if mask_victim: agent_wrappers = make_mask_agent_wrappers(env_name, victim_index, **mask_victim_kwargs) if victim_type == 'none': our_idx = 0 else: our_idx = 1 - victim_index def env_fn(i): return utils.make_env(env_name, _seed, i, out_dir, our_idx, pre_wrapper=pre_wrapper, agent_wrappers=agent_wrappers) if not debug and num_env > 1: make_vec_env = make_subproc_vec_multi_env else: make_vec_env = make_dummy_vec_multi_env multi_venv = make_vec_env([functools.partial(env_fn, i) for i in range(num_env)]) if victim_type == 'none': assert multi_venv.num_agents == 1, "No victim only works in single-agent environments" else: assert multi_venv.num_agents == 2, "Need two-agent environment when victim" return multi_venv, our_idx
def build_env(out_dir, _seed, env_name, num_env, victim_type, victim_index, mask_victim, mask_victim_kwargs, lookback_params, debug): pre_wrappers = [] if env_name.startswith('multicomp/'): pre_wrappers.append(GymCompeteToOurs) if lookback_params['lb_num'] > 0: pre_wrappers.append(OldMujocoResettableWrapper) agent_wrappers = {} if mask_victim: agent_wrappers = make_mask_agent_wrappers(env_name, victim_index, **mask_victim_kwargs) if victim_type == 'none': our_idx = 0 else: our_idx = 1 - victim_index def env_fn(i): return modelfree.envs.wrappers.make_env(env_name, _seed, i, out_dir, our_idx, pre_wrappers=pre_wrappers, agent_wrappers=agent_wrappers) if not debug and num_env > 1: make_vec_env = make_subproc_vec_multi_env else: make_vec_env = make_dummy_vec_multi_env multi_venv = make_vec_env( [functools.partial(env_fn, i) for i in range(num_env)]) if debug and lookback_params['lb_num'] > 0: multi_venv = DebugVenv(multi_venv) if victim_type == 'none': assert multi_venv.num_agents == 1, "No victim only works in single-agent environments" else: assert multi_venv.num_agents == 2, "Need two-agent environment when victim" return multi_venv, our_idx
def score_agent(_run, _seed, env_name, agent_a_path, agent_b_path, agent_a_type, agent_b_type, record_traj, record_traj_params, transparent_params, num_env, videos, video_params, mask_agent_index, noisy_agent_index, noisy_agent_magnitude, mask_agent_noise): save_dir = video_params['save_dir'] if videos: if save_dir is None: score_ex_logger.info("No directory provided for saving videos; using a tmpdir instead," "but videos will be saved to Sacred run directory") tmp_dir = tempfile.TemporaryDirectory() save_dir = tmp_dir.name else: tmp_dir = None video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)] pre_wrappers = [GymCompeteToOurs] if 'multicomp' in env_name else [] agent_wrappers = {} if mask_agent_index is not None: mask_agent_kwargs = {} if mask_agent_noise is not None: mask_agent_kwargs['noise_magnitude'] = mask_agent_noise agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index, **mask_agent_kwargs) video_params = utils.sacred_copy(video_params) # Sacred issue #499 def env_fn(i): env = make_env(env_name, _seed, i, None, pre_wrappers=pre_wrappers, agent_wrappers=agent_wrappers) if videos: if video_params['annotated']: if 'multicomp' in env_name: assert num_env == 1, "pretty videos requires num_env=1" env = AnnotatedGymCompete(env, env_name, agent_a_type, agent_a_path, agent_b_type, agent_b_path, mask_agent_index, **video_params['annotation_params']) else: warnings.warn(f"Annotated videos not supported for environment '{env_name}'") env = VideoWrapper(env, video_dirs[i], video_params['single_file']) return env env_fns = [functools.partial(env_fn, i) for i in range(num_env)] if num_env > 1: venv = make_subproc_vec_multi_env(env_fns) else: venv = make_dummy_vec_multi_env(env_fns) if record_traj: venv = TrajectoryRecorder(venv, record_traj_params['agent_indices']) if venv.num_agents == 1 and agent_b_path != 'none': raise ValueError("Set agent_b_path to 'none' if environment only uses one agent.") agent_paths = [agent_a_path, agent_b_path] agent_types = [agent_a_type, agent_b_type] zipped = list(zip(agent_types, agent_paths)) agents = [load_policy(policy_type, policy_path, venv, env_name, i, transparent_params) for i, (policy_type, policy_path) in enumerate(zipped[:venv.num_agents])] if noisy_agent_index is not None: agents[noisy_agent_index] = NoisyAgentWrapper(agents[noisy_agent_index], noise_annealer=lambda: noisy_agent_magnitude) score = get_empirical_score(venv, agents) for agent in agents: if agent.sess is not None: agent.sess.close() if record_traj: save_paths = venv.save(save_dir=record_traj_params['save_dir']) for save_path in save_paths: score_ex.add_artifact(save_path, name="victim_activations.npz") venv.close() if videos: for env_video_dir in video_dirs: try: for file_path in os.listdir(env_video_dir): _save_video_or_metadata(env_video_dir, file_path) except FileNotFoundError: warnings.warn("Can't find path {}; no videos from that path added as artifacts" .format(env_video_dir)) if tmp_dir is not None: tmp_dir.cleanup() for observer in score_ex.observers: if hasattr(observer, 'dir'): _clean_video_directory_structure(observer) return score