def evaluate_model(algorithm: MarlAlgorithm, eval_scenario, sub_folder, trials=100): """ Evaluate given model in given environment. :param algorithm: :param trials: :return: """ # note, this is the evaluation used by train.py, merge this with arean's evaluation script # run them all in parallel at once to make sure we get exactly 'trials' number of environments os.makedirs(sub_folder, exist_ok=True) vec_env = make_env( eval_scenario, trials, name="eval", log_path=sub_folder, ) env_obs = vec_env.reset() rnn_states = algorithm.get_initial_rnn_state(vec_env.num_envs) env_terminals = np.zeros([len(rnn_states)], dtype=np.bool) vec_env.run_once = True # play the game... results = [(0, 0, 0) for _ in range(trials)] while not all(env_terminals): with torch.no_grad(): roles = vec_env.get_roles() model_output, new_rnn_states = algorithm.forward( obs=torch.from_numpy(env_obs), rnn_states=rnn_states, roles=torch.from_numpy(roles)) rnn_states[:] = new_rnn_states log_policy = model_output["log_policy"].detach().cpu().numpy() actions = utils.sample_action_from_logp(log_policy) env_obs, env_rewards, env_terminals, env_infos = vec_env.step(actions) # look for finished games for i, env in enumerate(vec_env.games): if env.round_outcome != "": results[i] = env.round_team_scores # collate results red_score = np.mean([r for r, g, b in results]) green_score = np.mean([g for r, g, b in results]) blue_score = np.mean([b for r, g, b in results]) # make sure results have be written to env log rescue.flush_logs() return red_score, green_score, blue_score
def run_evaluation( controllers: List[BaseController], # controllers for each team scenario: str, log_path: str, trials=100, ): """ Evalulate the performance of controllers in a given environment. :param controllers: list of tuples (red,green,blue) :param scenario: name of scenario to evaluate on :param log_path: path to log to :param trials: number of games to run in evaluation :return: """ # run them all in parallel at once and make sure we get exactly 'trials' number of environments by forcing # them to only once (no reset) os.makedirs(log_path, exist_ok=True) vec_env = make_env(scenario, trials, name="eval", log_path=log_path) env_obs = vec_env.reset() # setup the controllers for team, controller in enumerate(controllers): controller.setup( [player for player in vec_env.players if player.team == team]) controller.reset() env_terminals = np.zeros([len(vec_env.players)], dtype=np.bool) vec_env.run_once = True roles = vec_env.get_roles() actions = np.zeros_like(roles) # play the game... results = [(0, 0, 0) for _ in range(trials)] while not all(env_terminals): actions *= 0 # split players by team, and assign actions. for team, controller in enumerate(controllers): role_filter = (roles == team) actions[role_filter] = controller.forward( torch.from_numpy(env_obs)[role_filter]) env_obs, env_rewards, env_terminals, env_infos = vec_env.step(actions) # look for finished games for i, env in enumerate(vec_env.games): if env.round_outcome != "": results[i] = env.round_team_scores # make sure results have be written to env log rescue.flush_logs() return results
def run_test(scenario_name, team, epochs=2): destination_folder = os.path.join(config.log_folder, scenario_name) os.makedirs(destination_folder, exist_ok=True) log_file = os.path.join(destination_folder, "env_0.csv") eval_log_file = os.path.join(destination_folder + "/eval", "env_0.csv") # our MARL environments are handled like vectorized environments make_env = lambda: RescueTheGeneralEnv( scenario_name, config.parallel_envs, name="test", log_file=log_file) vec_env = MultiAgentVecEnv([make_env for _ in range(config.parallel_envs)]) algorithm = make_algo(vec_env, config) step_counter = 0 for epoch in range(epochs): scores = evaluate_model(algorithm, scenario_name, f"{destination_folder}/eval", trials=100) if epoch != 0: print() # flush the log buffer rescue.flush_logs() results = load_results(eval_log_file) epoch_score = get_score(results, team) print(f" -eval_{epoch}: [{epoch_score:.1f}]", end='') step_counter = learn(algorithm, step_counter, (epoch + 1) * 1e6, verbose=config.verbose == 1) print() scores = evaluate_model(algorithm, scenario_name, f"{destination_folder}/eval", trials=100) rescue.flush_logs() results = load_results(eval_log_file) final_score = get_score(results, team) print(f" -final_eval: {final_score}") video.export_video(f"{destination_folder}/{scenario_name}.mp4", algorithm, scenario_name) try: export_graph(eval_log_file, epoch=epochs, png_base_name="results") except Exception as e: # not worried about this not working... print(e) # return scores return results
def train_model(): """ Train model on the environment using the "other agents are environment" method. :return: """ print("=" * 60) start_epoch = 0 # copy source files for later if config.restore: # find path to restore from root = os.path.split(config.log_folder)[0] restore_points = [ f for f in os.listdir(root) if os.path.isdir(os.path.join(root, f)) and config.run in f ] if len(restore_points) == 0: raise Exception(f"No restore points matching {config.run} found.") if len(restore_points) > 1: raise Exception( f"Multiple restore points matching {config.run} found. {restore_points}" ) config.log_folder = os.path.join(root, restore_points[0]) else: from shutil import copyfile for filename in ["train.py", "rescue.py"]: copyfile(filename, f"{config.log_folder}/{filename}") # make a copy of the environment parameters with open(f"{config.log_folder}/config.txt", "w") as f: f.write(str(config)) vec_env = make_env(config.train_scenarios, config.parallel_envs, name="train", log_path=config.log_folder) print("Scenario parameters:") scenario_descriptions = set(str(env.scenario) for env in vec_env.games) for description in scenario_descriptions: print(description) print() print("Config:") print(config) print() algorithm = make_algo(vec_env, config) if config.restore: # load model algorithm.load(os.path.join(config.log_folder, "model.pt")) start_epoch = int(algorithm.t // 1e6) print(f"Restored from checkpoint [{start_epoch}] ") # reset env algorithm.reset() print("=" * 60) start_time = time.time() step_counter = algorithm.t for epoch in range(start_epoch, config.epochs): global CURRENT_EPOCH CURRENT_EPOCH = epoch print() print(f"Training epoch {epoch} on experiment {config.log_folder}") # perform evaluations (if required) for index, eval_scenario in enumerate(config.eval_scenarios): sub_folder = f"{config.log_folder}/eval_{index}" os.makedirs(sub_folder, exist_ok=True) results_file = os.path.join(sub_folder, "results.csv") scores = evaluate_model(algorithm, eval_scenario, sub_folder, trials=100) rounded_scores = tuple(round(float(score), 1) for score in scores) print( f" -evaluation against {str(eval_scenario):<40} {rounded_scores}" ) # generate a video if config.export_video: video.export_video(f"{sub_folder}/evaluation_{epoch:03}_M.mp4", algorithm, eval_scenario) # write results to text file if not os.path.exists(results_file): with open(results_file, "w") as f: f.write("epoch, red_score, green_score, blue_score\n") with open(results_file, "a+") as f: f.write(f"{epoch}, {scores[0]}, {scores[1]}, {scores[2]}\n") # flush buffer rescue.flush_logs() try: log_file = os.path.join(sub_folder, f"env_0.csv") export_graph(log_file, epoch=epoch, png_base_name=f"eval_{index}") except Exception as e: # not worried about this not working... print(e) # export training video if config.export_video: video.export_video( f"{config.log_folder}/training_{epoch:03}_M.mp4", algorithm, config.train_scenarios[0]) # save model if config.save_model == "all": algorithm.save(f"{config.log_folder}/model_{epoch:03}_M.pt") elif config.save_model == "none": pass elif config.save_model == "recent": algorithm.save(f"{config.log_folder}/model.pt") else: try: save_every = int(config.save_model) if epoch % save_every == 0: algorithm.save( f"{config.log_folder}/model_{epoch:03}_M.pt") except: raise ValueError( "Invalid save model parameter, use [none|recent|all|0..n]." ) algorithm.save(f"{config.log_folder}/model.pt") step_counter = learn(algorithm, step_counter, (epoch + 1) * 1e6, verbose=config.verbose == 1) print() # save logs algorithm.save_logs() # flush the log buffer and print scores rescue.flush_logs() print_scores(epoch=epoch) algorithm.save(f"{config.log_folder}/model_final.p") if config.export_video: video.export_video( f"{config.log_folder}/ppo_run_{config.epochs:03}_M.mp4", algorithm, config.train_scenarios[0]) time_taken = time.time() - start_time print(f"Finished training after {time_taken/60/60:.1f}h.")