def run( base_config: Dict[str, Any], ray_server: str, init_kwargs: Dict[str, Any], exp_name: str, spec: Dict[str, Any], ) -> ray.tune.ExperimentAnalysis: ray.init(address=ray_server, **init_kwargs) # We have to register the function we're going to call with Ray. # We partially apply worker_fn, so it's different for each experiment. # Compute a hash based on the config to make sure it has a unique name! # Note Ray does let you pass a worker_fn directly without registering, but then # it registers using the function name (which may not be unique). cfg = { # ReadOnlyDict's aren't serializable: see sacred issue #499 "base_config": utils.sacred_copy(base_config), "exp_name": exp_name, } cfg_str = json.dumps(cfg) hasher = hashlib.md5() # we are not worried about security here hasher.update(cfg_str.encode("utf8")) cfg_hash = hasher.hexdigest() trainable_name = f"{worker_name}-{cfg_hash}" base_config = utils.sacred_copy(base_config) trainable_fn = functools.partial(worker_fn, base_config) tune.register_trainable(trainable_name, trainable_fn) exp_id = f"{ex.path}/{exp_name}/{utils.make_timestamp()}-{uuid.uuid4().hex}" spec = utils.sacred_copy(spec) # Disable TensorBoard logger: fails due to the spec containing string variables. tune_loggers = [tune.logger.JsonLogger, tune.logger.CSVLogger] sync_config = None if "sync_config" in spec: sync_config = tune.SyncConfig(**spec["sync_config"]) try: result = tune.run( trainable_name, name=exp_id, config=spec["config"], sync_config=sync_config, loggers=tune_loggers, **spec["run_kwargs"], ) finally: ray.shutdown() return result, exp_id
def run( base_config: Dict[str, Any], ray_server: str, init_kwargs: Dict[str, Any], exp_name: str, spec: Dict[str, Any], ) -> ray.tune.ExperimentAnalysis: ray.init(redis_address=ray_server, **init_kwargs) # We have to register the function we're going to call with Ray. # We partially apply worker_fn, so it's different for each experiment. # Compute a hash based on the config to make sure it has a unique name! # Note Ray does let you pass a worker_fn directly without registering, but then # it registers using the function name (which may not be unique). cfg = { # ReadOnlyDict's aren't serializable: see sacred issue #499 "base_config": utils.sacred_copy(base_config), "exp_name": exp_name, } cfg_str = json.dumps(cfg) hasher = hashlib.md5() # we are not worried about security here hasher.update(cfg_str.encode("utf8")) cfg_hash = hasher.hexdigest() trainable_name = f"{worker_name}-{cfg_hash}" base_config = utils.sacred_copy(base_config) trainable_fn = functools.partial(worker_fn, base_config) tune.register_trainable(trainable_name, trainable_fn) exp_id = f"{ex.path}/{exp_name}/{utils.make_timestamp()}-{uuid.uuid4().hex}" spec = utils.sacred_copy(spec) try: result = tune.run( trainable_name, name=exp_id, config=spec["config"], # TODO(adam): delete next line when ray #6126 merged checkpoint_freq=10000000, **spec["run_kwargs"], ) finally: ray.shutdown() return result, exp_id
def fit_model( _run, ray_server: str, init_kwargs: Dict[str, Any], activation_glob: str, output_root: str, max_timesteps: int, data_type, model_class, model_kwargs, train_opponent, train_percentage, ): """Fits density models for each environment and victim type in activation_dir, saving resulting models to output_root. Works by repeatedly calling `density_fitter`, running in parallel via Ray.""" try: ray.init(address=ray_server, **init_kwargs) # Find activation paths for each environment & victim-path tuple stem_pattern = re.compile(r"(.*)_opponent_.*\.npz") opponent_pattern = re.compile(r".*_opponent_([^\s]+)+\.npz") # activation_paths is indexed by [env_victim][opponent_type] where env_victim is # e.g. 'SumoHumans-v0_victim_zoo_1' and opponent_type is e.g. 'ppo2_1'. activation_paths = {} for activation_path in glob.glob(activation_glob): activation_dir = os.path.basename(activation_path) stem_match = stem_pattern.match(activation_dir) if stem_match is None: logger.debug(f"Skipping {activation_path}") continue stem = stem_match.groups()[0] opponent_match = opponent_pattern.match(activation_dir) opponent_type = opponent_match.groups()[0] activation_paths.setdefault(stem, {})[opponent_type] = activation_path # Create temporary output directory (if needed) tmp_dir = None if output_root is None: tmp_dir = tempfile.TemporaryDirectory() output_root = tmp_dir.name else: exp_name = gen_exp_name(model_class, model_kwargs) output_root = os.path.join(output_root, exp_name) # Fit density model and save weights results = [] for stem, paths in activation_paths.items(): output_dir = osp.join(output_root, stem) os.makedirs(output_dir) future = density_fitter.remote( paths, output_dir, model_class, utils.sacred_copy(model_kwargs), max_timesteps, data_type, train_opponent, train_percentage, ) results.append(future) ray.get(results) # block until all jobs have finished utils.add_artifacts(_run, output_root, ingredient=fit_model_ex) finally: # Clean up temporary directory (if needed) if tmp_dir is not None: tmp_dir.cleanup() ray.shutdown()
def score_agent( _run, _seed, env_name, agent_a_path, agent_b_path, agent_a_type, agent_b_type, record_traj, record_traj_params, transparent_params, num_env, videos, video_params, mask_agent_index, noisy_agent_index, noisy_agent_magnitude, mask_agent_noise, ): save_dir = video_params["save_dir"] if videos: if save_dir is None: score_ex_logger.info( "No directory provided for saving videos; using a tmpdir instead," " but videos will be saved to Sacred run directory") tmp_dir = tempfile.TemporaryDirectory(prefix="score-videos") save_dir = tmp_dir.name else: tmp_dir = None video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)] agent_wrappers = {} if mask_agent_index is not None: mask_agent_kwargs = {} if mask_agent_noise is not None: mask_agent_kwargs["noise_magnitude"] = mask_agent_noise agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index, **mask_agent_kwargs) video_params = utils.sacred_copy(video_params) # Sacred issue #499 def env_fn(i): env = make_env(env_name, _seed, i, None, agent_wrappers=agent_wrappers) if videos: if video_params["annotated"]: if "multicomp" in env_name: assert num_env == 1, "pretty videos requires num_env=1" env = AnnotatedGymCompete( env, env_name, agent_a_type, agent_a_path, agent_b_type, agent_b_path, mask_agent_index, **video_params["annotation_params"], ) else: warnings.warn( f"Annotated videos not supported for environment '{env_name}'" ) env = VideoWrapper(env, video_dirs[i], video_params["single_file"]) return env env_fns = [functools.partial(env_fn, i) for i in range(num_env)] if num_env > 1: venv = make_subproc_vec_multi_env(env_fns) else: venv = make_dummy_vec_multi_env(env_fns) if record_traj: venv = TrajectoryRecorder(venv, record_traj_params["agent_indices"]) if venv.num_agents == 1 and agent_b_path != "none": raise ValueError( "Set agent_b_path to 'none' if environment only uses one agent.") agent_paths = [agent_a_path, agent_b_path] agent_types = [agent_a_type, agent_b_type] zipped = list(zip(agent_types, agent_paths)) agents = [ load_policy(policy_type, policy_path, venv, env_name, i, transparent_params) for i, (policy_type, policy_path) in enumerate(zipped[:venv.num_agents]) ] if noisy_agent_index is not None: agents[noisy_agent_index] = NoisyAgentWrapper( agents[noisy_agent_index], noise_annealer=lambda: noisy_agent_magnitude) score = get_empirical_score(venv, agents) for agent in agents: if agent.sess is not None: agent.sess.close() if record_traj: save_paths = venv.save(save_dir=record_traj_params["save_dir"]) for save_path in save_paths: score_ex.add_artifact(save_path, name="victim_activations.npz") venv.close() if videos: for env_video_dir in video_dirs: added = False for file_path in os.listdir(env_video_dir): added |= _save_video_or_metadata(env_video_dir, file_path) if not added: raise FileNotFoundError( f"No video artifacts found in path {env_video_dir}.") if tmp_dir is not None: tmp_dir.cleanup() for observer in score_ex.observers: if hasattr(observer, "dir"): _clean_video_directory_structure(observer) return score