def main(path: str, name: str):
    task = generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)

    #sort_fn = lambda x: int(x.split('_')[-1][:-3])  # ExIt
    sort_fn = lambda x: int(x.split('/')[-1].split('_')[0]
                            )  # PPO test training
    sorted_population = load_population_from_path(path=path, sort_fn=sort_fn)

    for agent in sorted_population:
        print(agent.algorithm.num_updates)
        agent.requires_environment_model = False
        agent.training = False

    winrate_matrix = compute_winrate_matrix_metagame(
        population=sorted_population, episodes_per_matchup=1000, task=task)
    maxent_nash, nash_averaging = compute_nash_averaging(
        winrate_matrix, perform_logodds_transformation=True)

    winrate_matrix = np.array(winrate_matrix)
    print(
        'Saving winrate_matrix, max-entropy Nash equilibrium for game defined by winrate matrix and Nash averaging'
    )
    np.savetxt(f'{name}_winrate_matrix.csv', winrate_matrix, delimiter=', ')
    np.savetxt(f'{name}_maxent_nash.csv', maxent_nash, delimiter=', ')
    np.savetxt(f'{name}_nash_averaging.csv', maxent_nash, delimiter=', ')

    ax = plot_winrate_matrix(winrate_matrix)

    plt.show()
Пример #2
0
def main(population: List, name: str, num_stack: int):
    #task = generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    task = generate_task('Connect4-v0',
                         EnvType.MULTIAGENT_SEQUENTIAL_ACTION,
                         wrappers=create_wrapper(num_stack=num_stack))

    winrate_matrix = compute_winrate_matrix_metagame(
        population=sorted_population,
        episodes_per_matchup=200,
        num_envs=-1,
        task=task,
        is_game_symmetrical=False,
        show_progress=True)
    maxent_nash, nash_averaging = compute_nash_averaging(
        winrate_matrix, perform_logodds_transformation=True)

    winrate_matrix = np.array(winrate_matrix)
    print(
        'Saving winrate_matrix, max-entropy Nash equilibrium for game defined by winrate matrix and Nash averaging'
    )
    np.savetxt(f'{name}/winrate_matrix.csv', winrate_matrix, delimiter=', ')
    np.savetxt(f'{name}/maxent_nash.csv', maxent_nash, delimiter=', ')
    np.savetxt(f'{name}/nash_averaging.csv', maxent_nash, delimiter=', ')

    ax = plot_winrate_matrix(winrate_matrix)

    plt.show()
Пример #3
0
def compute_optimality_metrics(population, task, benchmarking_episodes,
                               logger):
    logger.info('Computing winrate matrix')
    winrate_matrix_start_time = time.time()
    winrate_matrix = compute_winrate_matrix_metagame(
        population, task=task, episodes_per_matchup=benchmarking_episodes)
    winrate_submatrices = [
        winrate_matrix[:i, :i] for i in range(1,
                                              len(winrate_matrix) + 1)
    ]
    winrate_matrix_total_time = time.time() - start_time
    logger.info('Computing winrate matrix took: {:.2} seconds'.format(
        winrate_matrix_total_time))

    nash_averaging_start_time = time.time()
    logger.info('Computing nash averagings for all submatrices')
    evolution_maxent_nash_and_nash_averaging = [
        compute_nash_averaging(m, perform_logodds_transformation=True)
        for m in winrate_submatrices
    ]
    nash_averaging_total_time = time.time() - nash_averaging_start_time
    logger.info(
        'Computing nash averagings for all submatrices too: {:.2} seconds'.
        format(nash_averaging_total_time))
    return winrate_submatrices, evolution_maxent_nash_and_nash_averaging
Пример #4
0
def compute_optimality_metrics(population, task, benchmarking_episodes, logger):
    logger.info('Computing winrate matrix')
    winrate_matrix = compute_winrate_matrix_metagame(population, task=task,
                                                     episodes_per_matchup=benchmarking_episodes)
    winrate_submatrices = [winrate_matrix[:i, :i] for i in range(1, len(winrate_matrix) + 1)]
    logger.info('Computing nash averagings for all submatrices')
    evolution_maxent_nash_and_nash_averaging = [compute_nash_averaging(m, perform_logodds_transformation=True)
                                                for m in winrate_submatrices]
    return winrate_submatrices, evolution_maxent_nash_and_nash_averaging
Пример #5
0
def compute_progression_of_nash_averagings(winrate_matrix: np.ndarray):
    '''
    Creates a lower triangular matrix
    '''
    maxent_nashes = [compute_nash_averaging(
                        winrate_matrix[:i,:i],
                        perform_logodds_transformation=True)[0]
        for i in range(1, winrate_matrix.shape[0] + 1)]
    for max_ent in maxent_nashes:
        max_ent.resize(winrate_matrix.shape[0], refcheck=False)
    return np.stack(maxent_nashes)
Пример #6
0
    def __init__(self,
                 task: Task,
                 meta_game_solver: Callable = lambda winrate_matrix:
                 compute_nash_averaging(
                     winrate_matrix, perform_logodds_transformation=True)[0],
                 threshold_best_response: float = 0.7,
                 benchmarking_episodes: int = 10,
                 match_outcome_rolling_window_size: int = 10):
        '''
        :param task: Multiagent task 
        :param meta_game_solver: Function which takes a meta-game and returns a probability
                                 distribution over the policies in the meta-game.
                                 Default uses maxent-Nash equilibrium for the logodds transformation
                                 of the winrate_matrix metagame.
        :param threshold_best_response: Winrate thrshold after which the agent being
                                        trained is to converge towards a best response
                                        againts the current meta-game solution.
        :param benchmarking_episodes: Number of episodes that will be used to compute winrates
                                      to fill the metagame.
        :param match_outcome_rolling_window_size: Number of episodes that will be used to
                                                  decide whether the currently training agent
                                                  has converged to a best response.
        '''
        self.name = f'PSRO(M=maxentNash,O=BestResponse(wr={threshold_best_response},ws={match_outcome_rolling_window_size})'
        self.logger = logging.getLogger(self.name)
        self.logger.setLevel(logging.INFO)
        self.check_parameter_validity(task, threshold_best_response,
                                      benchmarking_episodes,
                                      match_outcome_rolling_window_size)
        self.task = task

        self.meta_game_solver = meta_game_solver
        self.meta_game, self.meta_game_solution = None, None
        self.menagerie = []

        self.threshold_best_response = threshold_best_response
        self.match_outcome_rolling_window = []
        self.match_outcome_rolling_window_size = match_outcome_rolling_window_size

        self.benchmarking_episodes = benchmarking_episodes

        self.statistics = [self.IterationStatistics(0, 0, 0, [0], np.nan)]
Пример #7
0
def single_experiment(task: Task, agents: List, selfplay_schemes: List[SelfPlayTrainingScheme],
               checkpoint_at_iterations: List[int], base_path: str, seed: int,
               benchmarking_episodes: int):
    trained_agent_paths = []
    for sp_scheme in sp_schemes:
        for agent in agents:
            training_agent = agent.clone(training=True)
            path = f'{base_path}/{sp_scheme.name}-{agent.name}'
            trained_agent_paths += [path]
            train_and_evaluate(task=task, self_play_scheme=sp_scheme,
                               training_agent=training_agent,
                               checkpoint_at_iterations=checkpoint_at_iterations,
                               benchmarking_episodes=experiment_config['benchmarking_episodes'],
                               base_path=path, seed=seed)
            # Self-play schemes like PSRO contain useful information
            dill.dump(sp_scheme, open(f'{path}/{sp_scheme.name}.pickle', 'wb'))

    logging.info('Computing relative performances')
    relative_performances_path = f'{base_path}/relative_performances/'
    if not os.path.exists(relative_performances_path): os.mkdir(relative_performances_path)
    compute_relative_pop_performance_all_populations(trained_agent_paths, task,
                                                     benchmarking_episodes,
                                                     base_path=relative_performances_path)

    logging.info('Loading all trained agents')
    joint_trained_population = reduce(lambda succ, path: succ + load_population_from_path(path),
                                      trained_agent_paths, [])
    logging.info('START winrate matrix computation of all trained policies')
    final_winrate_matrix = compute_winrate_matrix_metagame(joint_trained_population,
                                                           episodes_per_matchup=5,
                                                           task=task)
    logging.info('START Nash averaging computation of all trained policies')
    maxent_nash, nash_avg = compute_nash_averaging(final_winrate_matrix,
                                                   perform_logodds_transformation=True)
    logging.info('Experiment FINISHED!')
    dill.dump(final_winrate_matrix,
                open(f'{base_path}/final_winrate_matrix.pickle', 'wb'))
    dill.dump(maxent_nash,
                open(f'{base_path}/final_maxent_nash.pickle', 'wb'))
Пример #8
0
def test_for_none_game_raises_valueerror():
    with pytest.raises(ValueError) as _:
        _ = compute_nash_averaging(None)
Пример #9
0
def test_for_non_antisymmetric_matrix_raises_valueerror():
    random_winrate_matrix = [[0.5, 0.2], [0.8, 0.5]]
    with pytest.raises(ValueError) as _:
        _ = compute_nash_averaging(random_winrate_matrix)
Пример #10
0
def test_for_non_integer_or_float_list_raises_valueerror():
    with pytest.raises(ValueError) as _:
        _ = compute_nash_averaging([['a', 'b']])
Пример #11
0
def test_for_empty_numpy_array_game_raises_valueerror():
    with pytest.raises(ValueError) as _:
        _ = compute_nash_averaging(np.array(None))