Exemplo n.º 1
0
class MultiArena:
    def __init__(self) -> None:

        self.env_initialized = False
        self.name = 'Multi Process Arena'
        self.collect_states_mode = False
        self.local_arena = Arena()

    def initialize_env(
            self,
            environment_id: str = 'gym_splendor_code:splendor-deterministic-v0'
    ):
        """Arena has its private environment to run the game."""
        self.env = gym_open_ai.make(environment_id)

    # def run_multi_process_self_play(self, mode, agent: Agent, render_game = False):
    #
    #     self.local_arena.run_self_play(mode, agent, render_game=render_game, mpi_communicator=comm)

    def run_many_duels(self,
                       mode,
                       list_of_agents: List[Agent],
                       n_games: int,
                       n_proc_per_agent: int,
                       shuffle: bool = True):

        assert n_games > 0, 'Number of games must be positive.'
        assert len(
            list_of_agents) == 2, 'This method can run on exactly two agents.'

        n_process = comm.Get_size()
        my_rank = comm.Get_rank()
        n_proc_per_agent = max(min(n_proc_per_agent, n_proc), 1)

        n_parallel_games = int(n_process / n_proc_per_agent)
        remaining_processes = n_process % n_proc_per_agent
        extra_process_per_game = int(remaining_processes / n_parallel_games)
        remaining_processes_after_all = remaining_processes % n_parallel_games

        colors = []
        for i in range(n_parallel_games):
            if i < remaining_processes_after_all:
                processes_to_add = n_proc_per_agent + extra_process_per_game + 1
                colors += [i] * processes_to_add
            if i >= remaining_processes_after_all:
                processes_to_add = n_proc_per_agent + extra_process_per_game
                colors += [i] * processes_to_add

        my_color = colors[my_rank]
        #set agents colors:
        for agent in list_of_agents:
            agent.set_color(my_color)

        #create communicators:
        new_communicator = comm.Split(my_color)

        #prepare jobs for each group of processes
        n_games_for_one_communicator = int(n_games / n_parallel_games)
        remaining_games = n_games % n_parallel_games

        if my_color < remaining_games:
            my_games = n_games_for_one_communicator + 1
        if my_color >= remaining_games:
            my_games = n_games_for_one_communicator

        local_main = new_communicator.Get_rank() == 0

        if local_main:
            print('My color = {} I have to take = {} games'.format(
                my_color, my_games))
        local_results = GameStatisticsDuels(list_of_agents[:1],
                                            list_of_agents[1:])

        for _ in range(my_games):
            if shuffle:
                starting_agent_id = random.choice(range(2))
            one_game_results = self.local_arena.run_one_duel(
                mode, list_of_agents, mpi_communicator=new_communicator)
            if local_main:
                local_results.register(one_game_results)

        #Gather all results:
        combined_results_list = comm.gather(local_results, root=0)

        if main_process:
            global_results = GameStatisticsDuels(list_of_agents[:1],
                                                 list_of_agents[1:])
            for local_result in combined_results_list:
                global_results.register(local_result)

            return global_results
Exemplo n.º 2
0
from agents.minmax_agent import MinMaxAgent
from agents.greedysearch_agent2 import GreedySearchAgent

from arena.arena import Arena

environment_id = 'gym_splendor_code:splendor-v1'
fight_pit = Arena(environment_id)

goku = RandomAgent(distribution='first_buy')
goku2 = RandomAgent(distribution='uniform')
#gohan = RandomAgent(distribution='uniform_on_types')
#gohan = RandomAgent(distribution='uniform')
#goku = GreedyAgen/t(weight = 0.3)
gohan = GreedySearchAgent(depth=5)
goku = MinMaxAgent(name="MinMax", depth=3)
gohan.name = "g2"
goku.name = "g1"
# profi = cProfile.Profile()
#
# profi.run('(fight_pit.run_many_duels([goku, gohan], number_of_games=50))')
# profi.dump_stats('profi2.prof')

fight_pit.run_one_duel([goku, gohan], render_game=True)

# time_dupa = time.time()
# for i in range(100):
#     print(i)
#     fight_pit = Arena()
#     fight_pit.run_one_duel([goku, gohan], starting_agent_id=0)
# print(time.time() - time_dupa)
Exemplo n.º 3
0
import gin

from gym_splendor_code.envs.mechanics.abstract_observation import DeterministicObservation
from gym_splendor_code.envs.mechanics.state import State
from nn_models.architectures.average_pool_v0 import StateEncoder, ValueRegressor, IdentityTransformer
gin.parse_config_file(
    '/home/tomasz/ML_Research/splendor/gym-splendor/experiments/MCTS_series_1/params.gin'
)

from agents.random_agent import RandomAgent
from agents.single_mcts_agent import SingleMCTSAgent
from arena.arena import Arena
from monte_carlo_tree_search.evaluation_policies.value_evaluator_nn import ValueEvaluator
from monte_carlo_tree_search.mcts_algorithms.single_process.single_mcts import SingleMCTS

arek = Arena()

a1 = RandomAgent()
a2 = SingleMCTSAgent(5, ValueEvaluator(), 0.6, True, True)
#
results = arek.run_one_duel('deterministic', [a1, a2])

# state1 = State()
# fufu = SingleMCTS(5, 0.6,  ValueEvaluator())
# fufu.create_root(DeterministicObservation(state1))
# fufu.run_mcts_pass()
Exemplo n.º 4
0
class ArenaMultiThread:
    def __init__(self, environment_id='gym_splendor_code:splendor-v0'):

        self.environment_id = environment_id
        self.progress_bar = None
        self.local_arena = Arena()

    def create_progress_bar(self, lenght):
        if main_thread and USE_TQDM:
            self.progress_bar = tqdm(total=lenght, postfix=None)

    def set_progress_bar(self, value):
        if main_thread and USE_TQDM:
            self.progress_bar.n = min(value, self.progress_bar.total - 1)
            self.progress_bar.update()

    def start_collecting_states(self):
        self.local_arena.start_collecting_states()

    def collect_only_from_middle_game(self, n_min_actions, dump_probability):
        self.local_arena.collect_only_from_middle_game(n_min_actions,
                                                       dump_probability)

    def stop_collecting_states(self):
        self.local_arena.start_collecting_states()

    def dump_collected_states(self, filename, folder):
        self.local_arena.dump_collected_states(filename, folder, my_rank)

    def return_collected_states(self):
        return self.local_arena.collect_states_df

    def collected_states_to_csv(self, filename):
        self.local_arena.collected_states_to_csv(filename, my_rank)

    def one_group_vs_other_duels(self,
                                 mode,
                                 list_of_agents1: List[Agent],
                                 list_of_agents2: List[Agent],
                                 games_per_duel: int,
                                 shuffle: bool = True):

        #create all pairs to fightd
        all_pairs = list(product(list_of_agents1, list_of_agents2))
        pairs_to_duel = [pair for pair in all_pairs if pair[0] != pair[1]]
        #create list of jobs:
        list_of_jobs = pairs_to_duel * games_per_duel
        #calculate jobs per thread:
        jobs_per_thread = int(len(list_of_jobs) / n_proc)
        remaining_jobs = len(list_of_jobs) % n_proc
        #create local arena
        local_results = GameStatisticsDuels(list_of_agents1, list_of_agents2)
        add_remaining_job = int(my_rank < remaining_jobs)

        #create progress bar
        self.create_progress_bar(len(list_of_jobs))

        for game_id in range(0, jobs_per_thread + add_remaining_job):
            if main_thread:
                pass
                #print(f'game_id = {game_id}')
            pair_to_duel = list_of_jobs[game_id * n_proc + my_rank]
            if shuffle:
                starting_agent_id = random.choice(range(2))
            one_game_results = self.local_arena.run_one_duel(
                mode, list(pair_to_duel))
            local_results.register(one_game_results)
            if main_thread:
                self.set_progress_bar((game_id + 1) * n_proc)

        #gather all results
        cumulative_results_unprocessed = comm.gather(local_results, root=0)
        if main_thread:
            cumulative_results = GameStatisticsDuels(list_of_agents1,
                                                     list_of_agents2)
            for one_thread_results in cumulative_results_unprocessed:
                cumulative_results.register(one_thread_results)

            return cumulative_results

    def run_many_games(self, mode, list_of_agents, n_games):
        return self.one_group_vs_other_duels(mode, [list_of_agents[0]],
                                             [list_of_agents[1]],
                                             games_per_duel=n_games,
                                             shuffle=True)

    def all_vs_all(self, mode, list_of_agents, n_games):
        return self.one_group_vs_other_duels(mode,
                                             list_of_agents,
                                             list_of_agents,
                                             games_per_duel=n_games)