def aug_opt( name, train_spec, scheduler, inner_config_dir, resume=None, fresh=False, search_alg=None, ): from ray.tune import run from ray.tune import Experiment train_spec['config']['config_dir'] = inner_config_dir if resume is None: resume = not fresh experiment = Experiment.from_json(name=name, spec=train_spec) if fresh and os.path.exists(experiment.local_dir): import shutil shutil.rmtree(experiment.local_dir) run(experiment, name=name, scheduler=scheduler, reuse_actors=True, verbose=True, resume=resume, search_alg=search_alg, trial_name_creator=tune.function(trial_str_creator))
def run_experiment(args, parser): # args.ray_object_store_memory = int(1e10) args.ray_redis_max_memory = int(2e9) if args.config_file: with open(args.config_file) as f: exp = yaml.load(f) else: raise Exception('No config file!') exp = merge_dicts(exp, args.config) log.info('Num workers: %d, num_envs_per_worker: %d', exp['config']['num_workers'], exp['config']['num_envs_per_worker']) if args.cfg_mixins is not None: for cfg_mixin_file in args.cfg_mixins: with open(cfg_mixin_file, 'r') as f: override_cfg = yaml.load(f) log.info('Overriding parameters from %s: %r', cfg_mixin_file, override_cfg) exp = merge_dicts(exp, override_cfg) if not exp.get("run"): parser.error("the following arguments are required: --run") if not exp.get("env") and not exp.get("config", {}).get("env"): parser.error("the following arguments are required: --env") if args.ray_num_nodes: cluster = Cluster() for _ in range(args.ray_num_nodes): cluster.add_node( num_cpus=args.ray_num_cpus or 1, num_gpus=args.ray_num_gpus or 0, object_store_memory=args.ray_object_store_memory, redis_max_memory=args.ray_redis_max_memory, ) ray.init(redis_address=cluster.redis_address, local_mode=args.local_mode) else: ray.init( redis_address=args.redis_address, object_store_memory=args.ray_object_store_memory, redis_max_memory=args.ray_redis_max_memory, num_cpus=args.ray_num_cpus, num_gpus=args.ray_num_gpus, local_mode=args.local_mode, ) exp = Experiment.from_json(args.experiment_name, exp) exp.spec['checkpoint_freq'] = 20 if args.pbt: exp.spec['checkpoint_freq'] = 3 exp.spec['checkpoint_at_end'] = True # exp.spec['checkpoint_score_attr'] = 'episode_reward_mean' exp.spec['keep_checkpoints_num'] = 5 if args.stop_seconds > 0: exp.spec['stop'] = {'time_total_s': args.stop_seconds} # if 'multiagent' in exp.spec['config']: # # noinspection PyProtectedMember # make_env = ray.tune.registry._global_registry.get(ENV_CREATOR, exp.spec['config']['env']) # temp_env = make_env(None) # obs_space, action_space = temp_env.observation_space, temp_env.action_space # temp_env.close() # del temp_env # # policies = dict( # main=(None, obs_space, action_space, {}), # dummy=(None, obs_space, action_space, {}), # ) # # exp.spec['config']['multiagent'] = { # 'policies': policies, # 'policy_mapping_fn': function(lambda agent_id: 'main'), # 'policies_to_train': ['main'], # } # # if args.dbg: # exp.spec['config']['num_workers'] = 1 # exp.spec['config']['num_gpus'] = 1 # exp.spec['config']['num_envs_per_worker'] = 1 # # if 'callbacks' not in exp.spec['config']: # exp.spec['config']['callbacks'] = {} # # fps_helper = FpsHelper() # # def on_train_result(info): # if 'APPO' in exp.spec['run']: # samples = info['result']['info']['num_steps_sampled'] # else: # samples = info['trainer'].optimizer.num_steps_trained # # fps_helper.record(samples) # fps = fps_helper.get_fps() # info['result']['custom_metrics']['fps'] = fps # # # remove this as currently # skip_frames = exp.spec['config']['env_config']['skip_frames'] # info['result']['custom_metrics']['fps_frameskip'] = fps * skip_frames # # exp.spec['config']['callbacks']['on_train_result'] = function(on_train_result) # # def on_episode_end(info): # episode = info['episode'] # stats = { # 'DEATHCOUNT': 0, # 'FRAGCOUNT': 0, # 'HITCOUNT': 0, # 'DAMAGECOUNT': 0, # 'KDR': 0, # 'FINAL_PLACE': 0, # 'LEADER_GAP': 0, # 'PLAYER_COUNT': 0, # 'BOT_DIFFICULTY': 0, # } # # # noinspection PyProtectedMember # agent_to_last_info = episode._agent_to_last_info # for agent in agent_to_last_info.keys(): # agent_info = agent_to_last_info[agent] # for stats_key in stats.keys(): # stats[stats_key] += agent_info.get(stats_key, 0.0) # # for stats_key in stats.keys(): # stats[stats_key] /= len(agent_to_last_info.keys()) # # episode.custom_metrics.update(stats) # # exp.spec['config']['callbacks']['on_episode_end'] = function(on_episode_end) extra_kwargs = {} if args.pbt: extra_kwargs['reuse_actors'] = False run(exp, name=args.experiment_name, scheduler=make_custom_scheduler(args), resume=args.resume, queue_trials=args.queue_trials, **extra_kwargs)
config["multiagent"] = { 'policy_graphs': policy_graph, 'policy_mapping_fn': tune.function(lambda agent_id: POLICY_ID), 'policies_to_train': [POLICY_ID] } env_name_list.append(env_name) config_list.append(config) # Register as rllib env register_env(env_name, create_env) exp_list = [] for config, env_name in zip(config_list, env_name_list): exp_tag = { "run": alg_run, "env": env_name, "config": { **config }, "checkpoint_freq": 10, "max_failures": 999, "stop": { "training_iteration": 50 }, "num_samples": 6, } exp_list.append(Experiment.from_json(args.exp_tag, exp_tag)) trials = run_experiments(experiments=exp_list)