class MultiArena: def __init__(self) -> None: self.env_initialized = False self.name = 'Multi Process Arena' self.collect_states_mode = False self.local_arena = Arena() def initialize_env( self, environment_id: str = 'gym_splendor_code:splendor-deterministic-v0' ): """Arena has its private environment to run the game.""" self.env = gym_open_ai.make(environment_id) # def run_multi_process_self_play(self, mode, agent: Agent, render_game = False): # # self.local_arena.run_self_play(mode, agent, render_game=render_game, mpi_communicator=comm) def run_many_duels(self, mode, list_of_agents: List[Agent], n_games: int, n_proc_per_agent: int, shuffle: bool = True): assert n_games > 0, 'Number of games must be positive.' assert len( list_of_agents) == 2, 'This method can run on exactly two agents.' n_process = comm.Get_size() my_rank = comm.Get_rank() n_proc_per_agent = max(min(n_proc_per_agent, n_proc), 1) n_parallel_games = int(n_process / n_proc_per_agent) remaining_processes = n_process % n_proc_per_agent extra_process_per_game = int(remaining_processes / n_parallel_games) remaining_processes_after_all = remaining_processes % n_parallel_games colors = [] for i in range(n_parallel_games): if i < remaining_processes_after_all: processes_to_add = n_proc_per_agent + extra_process_per_game + 1 colors += [i] * processes_to_add if i >= remaining_processes_after_all: processes_to_add = n_proc_per_agent + extra_process_per_game colors += [i] * processes_to_add my_color = colors[my_rank] #set agents colors: for agent in list_of_agents: agent.set_color(my_color) #create communicators: new_communicator = comm.Split(my_color) #prepare jobs for each group of processes n_games_for_one_communicator = int(n_games / n_parallel_games) remaining_games = n_games % n_parallel_games if my_color < remaining_games: my_games = n_games_for_one_communicator + 1 if my_color >= remaining_games: my_games = n_games_for_one_communicator local_main = new_communicator.Get_rank() == 0 if local_main: print('My color = {} I have to take = {} games'.format( my_color, my_games)) local_results = GameStatisticsDuels(list_of_agents[:1], list_of_agents[1:]) for _ in range(my_games): if shuffle: starting_agent_id = random.choice(range(2)) one_game_results = self.local_arena.run_one_duel( mode, list_of_agents, mpi_communicator=new_communicator) if local_main: local_results.register(one_game_results) #Gather all results: combined_results_list = comm.gather(local_results, root=0) if main_process: global_results = GameStatisticsDuels(list_of_agents[:1], list_of_agents[1:]) for local_result in combined_results_list: global_results.register(local_result) return global_results
from agents.minmax_agent import MinMaxAgent from agents.greedysearch_agent2 import GreedySearchAgent from arena.arena import Arena environment_id = 'gym_splendor_code:splendor-v1' fight_pit = Arena(environment_id) goku = RandomAgent(distribution='first_buy') goku2 = RandomAgent(distribution='uniform') #gohan = RandomAgent(distribution='uniform_on_types') #gohan = RandomAgent(distribution='uniform') #goku = GreedyAgen/t(weight = 0.3) gohan = GreedySearchAgent(depth=5) goku = MinMaxAgent(name="MinMax", depth=3) gohan.name = "g2" goku.name = "g1" # profi = cProfile.Profile() # # profi.run('(fight_pit.run_many_duels([goku, gohan], number_of_games=50))') # profi.dump_stats('profi2.prof') fight_pit.run_one_duel([goku, gohan], render_game=True) # time_dupa = time.time() # for i in range(100): # print(i) # fight_pit = Arena() # fight_pit.run_one_duel([goku, gohan], starting_agent_id=0) # print(time.time() - time_dupa)
import gin from gym_splendor_code.envs.mechanics.abstract_observation import DeterministicObservation from gym_splendor_code.envs.mechanics.state import State from nn_models.architectures.average_pool_v0 import StateEncoder, ValueRegressor, IdentityTransformer gin.parse_config_file( '/home/tomasz/ML_Research/splendor/gym-splendor/experiments/MCTS_series_1/params.gin' ) from agents.random_agent import RandomAgent from agents.single_mcts_agent import SingleMCTSAgent from arena.arena import Arena from monte_carlo_tree_search.evaluation_policies.value_evaluator_nn import ValueEvaluator from monte_carlo_tree_search.mcts_algorithms.single_process.single_mcts import SingleMCTS arek = Arena() a1 = RandomAgent() a2 = SingleMCTSAgent(5, ValueEvaluator(), 0.6, True, True) # results = arek.run_one_duel('deterministic', [a1, a2]) # state1 = State() # fufu = SingleMCTS(5, 0.6, ValueEvaluator()) # fufu.create_root(DeterministicObservation(state1)) # fufu.run_mcts_pass()
class ArenaMultiThread: def __init__(self, environment_id='gym_splendor_code:splendor-v0'): self.environment_id = environment_id self.progress_bar = None self.local_arena = Arena() def create_progress_bar(self, lenght): if main_thread and USE_TQDM: self.progress_bar = tqdm(total=lenght, postfix=None) def set_progress_bar(self, value): if main_thread and USE_TQDM: self.progress_bar.n = min(value, self.progress_bar.total - 1) self.progress_bar.update() def start_collecting_states(self): self.local_arena.start_collecting_states() def collect_only_from_middle_game(self, n_min_actions, dump_probability): self.local_arena.collect_only_from_middle_game(n_min_actions, dump_probability) def stop_collecting_states(self): self.local_arena.start_collecting_states() def dump_collected_states(self, filename, folder): self.local_arena.dump_collected_states(filename, folder, my_rank) def return_collected_states(self): return self.local_arena.collect_states_df def collected_states_to_csv(self, filename): self.local_arena.collected_states_to_csv(filename, my_rank) def one_group_vs_other_duels(self, mode, list_of_agents1: List[Agent], list_of_agents2: List[Agent], games_per_duel: int, shuffle: bool = True): #create all pairs to fightd all_pairs = list(product(list_of_agents1, list_of_agents2)) pairs_to_duel = [pair for pair in all_pairs if pair[0] != pair[1]] #create list of jobs: list_of_jobs = pairs_to_duel * games_per_duel #calculate jobs per thread: jobs_per_thread = int(len(list_of_jobs) / n_proc) remaining_jobs = len(list_of_jobs) % n_proc #create local arena local_results = GameStatisticsDuels(list_of_agents1, list_of_agents2) add_remaining_job = int(my_rank < remaining_jobs) #create progress bar self.create_progress_bar(len(list_of_jobs)) for game_id in range(0, jobs_per_thread + add_remaining_job): if main_thread: pass #print(f'game_id = {game_id}') pair_to_duel = list_of_jobs[game_id * n_proc + my_rank] if shuffle: starting_agent_id = random.choice(range(2)) one_game_results = self.local_arena.run_one_duel( mode, list(pair_to_duel)) local_results.register(one_game_results) if main_thread: self.set_progress_bar((game_id + 1) * n_proc) #gather all results cumulative_results_unprocessed = comm.gather(local_results, root=0) if main_thread: cumulative_results = GameStatisticsDuels(list_of_agents1, list_of_agents2) for one_thread_results in cumulative_results_unprocessed: cumulative_results.register(one_thread_results) return cumulative_results def run_many_games(self, mode, list_of_agents, n_games): return self.one_group_vs_other_duels(mode, [list_of_agents[0]], [list_of_agents[1]], games_per_duel=n_games, shuffle=True) def all_vs_all(self, mode, list_of_agents, n_games): return self.one_group_vs_other_duels(mode, list_of_agents, list_of_agents, games_per_duel=n_games)