Exemplo n.º 1
0
def evaluate_model(algorithm: MarlAlgorithm,
                   eval_scenario,
                   sub_folder,
                   trials=100):
    """
    Evaluate given model in given environment.
    :param algorithm:
    :param trials:
    :return:
    """

    # note, this is the evaluation used by train.py, merge this with arean's evaluation script

    # run them all in parallel at once to make sure we get exactly 'trials' number of environments
    os.makedirs(sub_folder, exist_ok=True)
    vec_env = make_env(
        eval_scenario,
        trials,
        name="eval",
        log_path=sub_folder,
    )
    env_obs = vec_env.reset()
    rnn_states = algorithm.get_initial_rnn_state(vec_env.num_envs)
    env_terminals = np.zeros([len(rnn_states)], dtype=np.bool)
    vec_env.run_once = True

    # play the game...
    results = [(0, 0, 0) for _ in range(trials)]
    while not all(env_terminals):

        with torch.no_grad():
            roles = vec_env.get_roles()
            model_output, new_rnn_states = algorithm.forward(
                obs=torch.from_numpy(env_obs),
                rnn_states=rnn_states,
                roles=torch.from_numpy(roles))
            rnn_states[:] = new_rnn_states

            log_policy = model_output["log_policy"].detach().cpu().numpy()
            actions = utils.sample_action_from_logp(log_policy)

        env_obs, env_rewards, env_terminals, env_infos = vec_env.step(actions)

        # look for finished games
        for i, env in enumerate(vec_env.games):
            if env.round_outcome != "":
                results[i] = env.round_team_scores

    # collate results
    red_score = np.mean([r for r, g, b in results])
    green_score = np.mean([g for r, g, b in results])
    blue_score = np.mean([b for r, g, b in results])

    # make sure results have be written to env log
    rescue.flush_logs()

    return red_score, green_score, blue_score
Exemplo n.º 2
0
def run_evaluation(
    controllers: List[BaseController],  # controllers for each team
    scenario: str,
    log_path: str,
    trials=100,
):
    """
    Evalulate the performance of controllers in a given environment.

    :param controllers: list of tuples (red,green,blue)
    :param scenario: name of scenario to evaluate on
    :param log_path: path to log to 
    :param trials: number of games to run in evaluation
    :return: 
    """

    # run them all in parallel at once and make sure we get exactly 'trials' number of environments by forcing
    # them to only once (no reset)
    os.makedirs(log_path, exist_ok=True)
    vec_env = make_env(scenario, trials, name="eval", log_path=log_path)
    env_obs = vec_env.reset()

    # setup the controllers
    for team, controller in enumerate(controllers):
        controller.setup(
            [player for player in vec_env.players if player.team == team])
        controller.reset()

    env_terminals = np.zeros([len(vec_env.players)], dtype=np.bool)
    vec_env.run_once = True

    roles = vec_env.get_roles()
    actions = np.zeros_like(roles)

    # play the game...
    results = [(0, 0, 0) for _ in range(trials)]
    while not all(env_terminals):

        actions *= 0

        # split players by team, and assign actions.
        for team, controller in enumerate(controllers):
            role_filter = (roles == team)
            actions[role_filter] = controller.forward(
                torch.from_numpy(env_obs)[role_filter])

        env_obs, env_rewards, env_terminals, env_infos = vec_env.step(actions)

        # look for finished games
        for i, env in enumerate(vec_env.games):
            if env.round_outcome != "":
                results[i] = env.round_team_scores

    # make sure results have be written to env log
    rescue.flush_logs()

    return results
Exemplo n.º 3
0
def run_test(scenario_name, team, epochs=2):

    destination_folder = os.path.join(config.log_folder, scenario_name)
    os.makedirs(destination_folder, exist_ok=True)
    log_file = os.path.join(destination_folder, "env_0.csv")
    eval_log_file = os.path.join(destination_folder + "/eval", "env_0.csv")

    # our MARL environments are handled like vectorized environments
    make_env = lambda: RescueTheGeneralEnv(
        scenario_name, config.parallel_envs, name="test", log_file=log_file)
    vec_env = MultiAgentVecEnv([make_env for _ in range(config.parallel_envs)])

    algorithm = make_algo(vec_env, config)

    step_counter = 0
    for epoch in range(epochs):
        scores = evaluate_model(algorithm,
                                scenario_name,
                                f"{destination_folder}/eval",
                                trials=100)
        if epoch != 0:
            print()

        # flush the log buffer
        rescue.flush_logs()
        results = load_results(eval_log_file)
        epoch_score = get_score(results, team)

        print(f" -eval_{epoch}: [{epoch_score:.1f}]", end='')
        step_counter = learn(algorithm,
                             step_counter, (epoch + 1) * 1e6,
                             verbose=config.verbose == 1)

    print()
    scores = evaluate_model(algorithm,
                            scenario_name,
                            f"{destination_folder}/eval",
                            trials=100)
    rescue.flush_logs()
    results = load_results(eval_log_file)
    final_score = get_score(results, team)
    print(f" -final_eval: {final_score}")

    video.export_video(f"{destination_folder}/{scenario_name}.mp4", algorithm,
                       scenario_name)

    try:
        export_graph(eval_log_file, epoch=epochs, png_base_name="results")
    except Exception as e:
        # not worried about this not working...
        print(e)

    # return scores
    return results
Exemplo n.º 4
0
def train_model():
    """
    Train model on the environment using the "other agents are environment" method.
    :return:
    """

    print("=" * 60)

    start_epoch = 0

    # copy source files for later
    if config.restore:
        # find path to restore from
        root = os.path.split(config.log_folder)[0]
        restore_points = [
            f for f in os.listdir(root)
            if os.path.isdir(os.path.join(root, f)) and config.run in f
        ]
        if len(restore_points) == 0:
            raise Exception(f"No restore points matching {config.run} found.")
        if len(restore_points) > 1:
            raise Exception(
                f"Multiple restore points matching {config.run} found. {restore_points}"
            )
        config.log_folder = os.path.join(root, restore_points[0])
    else:
        from shutil import copyfile
        for filename in ["train.py", "rescue.py"]:
            copyfile(filename, f"{config.log_folder}/{filename}")

        # make a copy of the environment parameters
        with open(f"{config.log_folder}/config.txt", "w") as f:
            f.write(str(config))

    vec_env = make_env(config.train_scenarios,
                       config.parallel_envs,
                       name="train",
                       log_path=config.log_folder)

    print("Scenario parameters:")
    scenario_descriptions = set(str(env.scenario) for env in vec_env.games)
    for description in scenario_descriptions:
        print(description)
    print()
    print("Config:")
    print(config)
    print()

    algorithm = make_algo(vec_env, config)

    if config.restore:
        # load model
        algorithm.load(os.path.join(config.log_folder, "model.pt"))
        start_epoch = int(algorithm.t // 1e6)
        print(f"Restored from checkpoint [{start_epoch}] ")

        # reset env
        algorithm.reset()

    print("=" * 60)

    start_time = time.time()

    step_counter = algorithm.t

    for epoch in range(start_epoch, config.epochs):

        global CURRENT_EPOCH
        CURRENT_EPOCH = epoch

        print()
        print(f"Training epoch {epoch} on experiment {config.log_folder}")

        # perform evaluations (if required)
        for index, eval_scenario in enumerate(config.eval_scenarios):

            sub_folder = f"{config.log_folder}/eval_{index}"
            os.makedirs(sub_folder, exist_ok=True)
            results_file = os.path.join(sub_folder, "results.csv")

            scores = evaluate_model(algorithm,
                                    eval_scenario,
                                    sub_folder,
                                    trials=100)
            rounded_scores = tuple(round(float(score), 1) for score in scores)

            print(
                f" -evaluation against {str(eval_scenario):<40} {rounded_scores}"
            )

            # generate a video
            if config.export_video:
                video.export_video(f"{sub_folder}/evaluation_{epoch:03}_M.mp4",
                                   algorithm, eval_scenario)

            # write results to text file
            if not os.path.exists(results_file):
                with open(results_file, "w") as f:
                    f.write("epoch, red_score, green_score, blue_score\n")
            with open(results_file, "a+") as f:
                f.write(f"{epoch}, {scores[0]}, {scores[1]}, {scores[2]}\n")

            # flush buffer
            rescue.flush_logs()

            try:
                log_file = os.path.join(sub_folder, f"env_0.csv")
                export_graph(log_file,
                             epoch=epoch,
                             png_base_name=f"eval_{index}")
            except Exception as e:
                # not worried about this not working...
                print(e)

        # export training video
        if config.export_video:
            video.export_video(
                f"{config.log_folder}/training_{epoch:03}_M.mp4", algorithm,
                config.train_scenarios[0])

        # save model
        if config.save_model == "all":
            algorithm.save(f"{config.log_folder}/model_{epoch:03}_M.pt")
        elif config.save_model == "none":
            pass
        elif config.save_model == "recent":
            algorithm.save(f"{config.log_folder}/model.pt")
        else:
            try:
                save_every = int(config.save_model)
                if epoch % save_every == 0:
                    algorithm.save(
                        f"{config.log_folder}/model_{epoch:03}_M.pt")
            except:
                raise ValueError(
                    "Invalid save model parameter, use [none|recent|all|0..n]."
                )
            algorithm.save(f"{config.log_folder}/model.pt")

        step_counter = learn(algorithm,
                             step_counter, (epoch + 1) * 1e6,
                             verbose=config.verbose == 1)
        print()

        # save logs
        algorithm.save_logs()

        # flush the log buffer and print scores
        rescue.flush_logs()
        print_scores(epoch=epoch)

    algorithm.save(f"{config.log_folder}/model_final.p")
    if config.export_video:
        video.export_video(
            f"{config.log_folder}/ppo_run_{config.epochs:03}_M.mp4", algorithm,
            config.train_scenarios[0])

    time_taken = time.time() - start_time
    print(f"Finished training after {time_taken/60/60:.1f}h.")