def main(): robot = Robot((18,1), True) #robot.set_speed(100) #_real_map = Arena(robot) #ArenaUtils.load_arena_from_file(_real_map, 'map/SampleWeek11.txt') _explore_map = Arena(robot) _explore_map.set_allunexplored() CommMgr.connect() _explore = Exploration(_explore_map, robot, 300, 3600) _explore.run() CommMgr.close()
def fp(): robot = Robot((18, 1), True) _real_map = Arena(robot) ArenaUtils.load_arena_from_file(_real_map, 'map/17_week10.txt') print('Awaiting FP_START') while True: _command = CommMgr.recv() if _command == 'FP_START': CommMgr.send('X', CommMgr.ARDUINO) break _go_to_wp_goal = FastestPath(_real_map, robot) _status = _go_to_wp_goal.do_fastest_path_wp_goal( (7, 10), ArenaConstant.GOAL_POS.value)
def explore(): robot = Robot((18, 1), True) _explore_map = Arena(robot) _explore_map.set_allunexplored() _explore = Exploration(_explore_map, robot, 300, 3600) _explore.run()
import gin from gym_splendor_code.envs.mechanics.abstract_observation import DeterministicObservation from gym_splendor_code.envs.mechanics.state import State from nn_models.architectures.average_pool_v0 import StateEncoder, ValueRegressor, IdentityTransformer gin.parse_config_file( '/home/tomasz/ML_Research/splendor/gym-splendor/experiments/MCTS_series_1/params.gin' ) from agents.random_agent import RandomAgent from agents.single_mcts_agent import SingleMCTSAgent from arena.arena import Arena from monte_carlo_tree_search.evaluation_policies.value_evaluator_nn import ValueEvaluator from monte_carlo_tree_search.mcts_algorithms.single_process.single_mcts import SingleMCTS arek = Arena() a1 = RandomAgent() a2 = SingleMCTSAgent(5, ValueEvaluator(), 0.6, True, True) # results = arek.run_one_duel('deterministic', [a1, a2]) # state1 = State() # fufu = SingleMCTS(5, 0.6, ValueEvaluator()) # fufu.create_root(DeterministicObservation(state1)) # fufu.run_mcts_pass()
from arena.arena import Arena from pygame.locals import * from arena.wall_initialiser import initialise_walls from arena.boy_initialiser import initialise_boys from brains.pathfinding.grid import BackgroundGrid import pygame SCREENRECT = Rect(0, 0, 640, 640) pygame.init() winstyle = 0 bestdepth = pygame.display.mode_ok(SCREENRECT.size, winstyle, 32) screen = pygame.display.set_mode((640, 640), winstyle, bestdepth) arena = Arena(SCREENRECT) method = 'steering' initialise_walls(arena, method) initialise_boys(arena, method) clock = pygame.time.Clock() def main(screen, arena): while 1: for event in pygame.event.get(): if event.type == QUIT: return screen.fill((30, 30, 30)) arena.update_screen_objects(screen)
class StateEncoder(AbstractModel): def __init__(self, gems_encoder_dim : int = None, price_encoder_dim : int = None, profit_encoder_dim : int = None, cards_points_dim: int = None, cards_dense1_dim: int = None, cards_dense2_dim: int = None, board_nobles_dense1_dim : int = None, board_nobles_dense2_dim : int = None, full_board_dense1_dim: int = None, full_board_dense2_dim: int = None, player_points_dim: int = None, player_nobles_dim: int = None, full_player_dense1_dim: int = None, full_player_dense2_dim: int = None, final_layer= None, data_transformer = None, network_name: str = None ): super().__init__() self.vectorizer = Vectorizer() self.final_layer = final_layer self.data_transformer = data_transformer self.params['data transormation'] = self.data_transformer.name self.params['final layer name'] = self.final_layer.name self.params['gems_encoder_dim'] = gems_encoder_dim self.params['gems_encoder_dim'] = gems_encoder_dim self.params['price_encoder_dim'] = price_encoder_dim self.params['profit_encoder_dim'] = profit_encoder_dim self.params['cards_points_dim'] = cards_points_dim self.params['cards_dense1_dim'] = cards_dense1_dim self.params['cards_dense2_dim'] = cards_dense2_dim self.params['board_nobles_dense1_dim'] = board_nobles_dense1_dim self.params['board_nobles_dense2_dim'] = board_nobles_dense2_dim self.params['full_board_dense1_dim']= full_board_dense1_dim self.params['full_board_dense2_dim'] = full_board_dense2_dim self.params['player_points_dim'] = player_points_dim self.params['player_nobles_dim'] = player_nobles_dim self.params['full_player_dense1_dim'] = full_player_dense1_dim self.params['full_player_dense2_dim']= full_player_dense2_dim self.arena = Arena() self.network_agent = ValueNNAgent(self) self.easy_opp = RandomAgent(distribution='first_buy') self.medium_opp = GreedyAgentBoost() self.hard_opp = MinMaxAgent() self.neptune_monitor = NeptuneMonitor() self.network_name = network_name self.gems_encoder = GemsEncoder(gems_encoder_dim) self.price_encoder = PriceEncoder(price_encoder_dim) self.board_encoder = BoardEncoder(self.gems_encoder, ManyNoblesEncoder(price_encoder_dim, board_nobles_dense1_dim, board_nobles_dense2_dim), ManyCardsEncoder(MAX_CARDS_ON_BORD, profit_encoder_dim, price_encoder_dim, cards_points_dim, cards_dense1_dim, cards_dense2_dim ), full_board_dense1_dim, full_board_dense2_dim) self.player_encoder = PlayerEncoder(self.gems_encoder, self.price_encoder, ManyCardsEncoder(MAX_RESERVED_CARDS, profit_encoder_dim, price_encoder_dim, cards_points_dim, cards_dense1_dim, cards_dense2_dim ), player_points_dim, player_nobles_dim, full_player_dense1_dim, full_player_dense2_dim) active_player_input = PlayersInputGenerator('active_').inputs other_player_input = PlayersInputGenerator('other_').inputs board_input = self.board_encoder.inputs self.inputs = board_input + active_player_input + other_player_input board_encoded = self.board_encoder(board_input) active_player_encoded = self.player_encoder(active_player_input) other_player_encoded = self.player_encoder(other_player_input) full_state = Concatenate(axis=-1)([board_encoded, active_player_encoded, other_player_encoded]) full_state = Dense(full_player_dense1_dim, activation='relu')(full_state) final_state = Dense(full_player_dense2_dim, activation='relu')(full_state) result = self.final_layer(final_state) self.layer = Model(inputs = self.inputs, outputs = final_state, name = 'full_state_splendor_estimator') self.network = Model(inputs = self.inputs, outputs = result, name = 'full_state_splendor_estimator') self.network.compile(Adam(), loss='mean_squared_error') self.params['Model name'] = 'Average pooling model' self.params['optimizer_name'] = 'Adam' def get_value(self, state): prediciton = self.network.predict(self.vectorizer.state_to_input(state)) return self.final_layer.get_value(prediciton) def train_on_mcts_data(self, data_frame, train_epochs:int): X = data_frame['state'] Y = data_frame['mcts_value'] X = self.vectorizer.many_states_to_input(X) Y = self.data_transformer.transform_array(Y) fit_history = self.network.fit(X, Y, epochs=train_epochs) return fit_history def train_network_on_many_sets(self, train_dir=None, validation_file=None, epochs=None, batch_size=None, test_games=1): assert self.network is not None, 'You must create network before training' with open(validation_file, 'rb') as f: X_val, Y_val = pickle.load(f) X_val = self.vectorizer.many_states_to_input(X_val) Y_val = self.data_transformer.transform_array(Y_val) self.neptune_monitor.reset_epoch_counter() file1, file2 = self.gather_data_info(train_dir, validation_file) self.start_neptune_experiment(experiment_name=self.network_name, description='Training avg_pool arch network', neptune_monitor=self.neptune_monitor) self.neptune_monitor.log_histograms(file1, file2) files_for_training = os.listdir(train_dir) for epoch in range(epochs): print(f'\n Epoch {epoch}: \n') file_epoch = epoch % len(files_for_training) X, Y = load_data_for_model(os.path.join(train_dir, files_for_training[file_epoch])) X = self.vectorizer.many_states_to_input(X) Y = self.data_transformer.transform_array(Y) self.network.fit(x=X, y=Y, epochs=1, batch_size=batch_size, validation_data=(X_val, Y_val), callbacks=[self.neptune_monitor]) del X del Y neptune.stop() def dump_weights(self, file_name): self.network.save_weights(file_name) def load_weights(self, file_name): self.network.load_weights(file_name) def gather_data_info(self, train_dir, validation_file): list_of_files = os.listdir(train_dir) example_file = list_of_files[0] with open(os.path.join(train_dir, example_file), 'rb') as f1: _, Y_ex = pickle.load(f1) with open(validation_file, 'rb') as f2: _, Y_val = pickle.load(f2) self.params['train set size'] = len(Y_ex) self.params['valid set size'] = len(Y_val) file1 = os.path.join('temp', 'train_hist.png') file2 = os.path.join('temp', 'valid_hist.png') Y_ex = self.data_transformer.transform_array(Y_ex) Y_val = self.data_transformer.transform_array(Y_val) plt.hist(Y_ex, bins=100) plt.savefig(file1) plt.clf() plt.hist(Y_val, bins=100) plt.savefig(file2) return file1, file2 def check_performance(self, n_games, opponents): performance_results = {} if 'easy' in opponents: easy_results = self.arena.run_many_duels('deterministic', [self.network_agent, self.easy_opp], n_games, shuffle_agents=True) _, _, easy_win_rate = easy_results.return_stats() performance_results['easy'] = easy_win_rate / n_games if 'medium' in opponents: medium_results = self.arena.run_many_duels('deterministic', [self.network_agent, self.medium_opp], n_games, shuffle_agents=True) _, _, medium_win_rate = medium_results.return_stats() performance_results['medium'] = medium_win_rate / n_games if 'hard' in opponents: hard_results = self.arena.run_many_duels('deterministic', [self.network_agent, self.hard_opp], n_games, shuffle_agents=True) _, _, hard_win_rate = hard_results.return_stats() performance_results['hard'] = hard_win_rate / n_games return performance_results def run_test(self, n_games): results = self.check_performance(n_games, ['easy']) self.neptune_monitor.log_win_rates(['easy'], results) def evaluate_fixed_states(self): results = [self.get_value(f_state) for f_state in list_of_fixes_states] self.neptune_monitor.log_state_values(results)
def init_objects(self, robot, map_file): self._robot = copy.deepcopy(robot) self._real_map = Arena(self._robot) ArenaUtils.load_arena_from_file(self._real_map, map_file) self._explore_map = Arena(self._robot) self._explore_map.set_allunexplored() self.BLOCK_SIZE = 30 self._waypoint = None # Simulator Main Window self._arena_size = (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE, ArenaConstant.ARENA_ROW.value * self.BLOCK_SIZE) #self._screen = pygame.display.set_mode((self._arena_size[0] * 2, self._arena_size[1])) self._screen = pygame.display.set_mode( (self._arena_size[0], self._arena_size[1])) self._screen.fill((0, 0, 0)) # Simulator Background self._background = pygame.Surface(self._arena_size) self._background = self._background.convert() self._background.fill((169, 169, 169)) # Simulator Alerts # self._alert = pygame.Surface((9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE)) # self._alert = self._alert.convert() # self._alert.fill((255,255,255)) # pygame.draw.rect(self._alert, (0,0,0), (0, 0, 9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE), 1) # Simulator Background self._menu = pygame.Surface(self._arena_size) self._menu = self._menu.convert() self._menu.fill((255, 255, 255)) # Prepare Background and Arena for row in range(ArenaConstant.ARENA_ROW.value): for col in range(ArenaConstant.ARENA_COL.value): if row == 17 and col == 0: pygame.draw.rect( self._background, (0, 100, 0), (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE, self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3)) if row == 0 and col == 12: pygame.draw.rect( self._background, (255, 140, 0), (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE, self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3)) pygame.draw.rect(self._background, (0, 0, 0), (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE, self.BLOCK_SIZE, self.BLOCK_SIZE), 1) # Simulator Menu # self._large_text = pygame.font.Font('freesansbold.ttf',20) # self._coverage_ex_menu_surf, self._coverage_ex_menu_rect = self.text_objects("Coverage-Limited Exploration", self._large_text) # self._coverage_ex_menu_rect.center = ((self._arena_size[0] / 2, 3 * self._arena_size[1] / 7)) # self._coverage_rect = pygame.Rect(self._coverage_ex_menu_rect.left - 15, self._coverage_ex_menu_rect.top - 15, self._coverage_ex_menu_rect.width + self.BLOCK_SIZE, self._coverage_ex_menu_rect.height + self.BLOCK_SIZE) # pygame.draw.rect(self._menu, (0,0,0), self._coverage_rect, 3) # self._menu.blit(self._coverage_ex_menu_surf, self._coverage_ex_menu_rect) # self._ex_menu_surf, self._ex_menu_rect = self.text_objects("Exploration", self._large_text) # self._ex_menu_rect.center = ((self._arena_size[0] / 2, self._arena_size[1] / 7)) # self._ex_rect = pygame.Rect.copy(self._coverage_rect) # self._ex_rect.center = self._ex_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._ex_rect, 3) # self._menu.blit(self._ex_menu_surf, self._ex_menu_rect) # self._timed_ex_menu_surf, self._timed_ex_menu_rect = self.text_objects("Timed Exploration", self._large_text) # self._timed_ex_menu_rect.center = ((self._arena_size[0] / 2, 2 * self._arena_size[1] / 7)) # self._timed_rect = pygame.Rect.copy(self._coverage_rect) # self._timed_rect.center = self._timed_ex_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._timed_rect, 3) # self._menu.blit(self._timed_ex_menu_surf, self._timed_ex_menu_rect) # self._wp_menu_surf, self._wp_menu_rect = self.text_objects("Add Waypoint", self._large_text) # self._wp_menu_rect.center = ((self._arena_size[0] / 2, 4 * self._arena_size[1] / 7)) # self._wp_rect = pygame.Rect.copy(self._coverage_rect) # self._wp_rect.center = self._wp_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._wp_rect, 3) # self._menu.blit(self._wp_menu_surf, self._wp_menu_rect) # self._fp_menu_surf, self._fp_menu_rect = self.text_objects("Fastest Path", self._large_text) # self._fp_menu_rect.center = ((self._arena_size[0] / 2, 5 * self._arena_size[1] / 7)) # self._fp_rect = pygame.Rect.copy(self._coverage_rect) # self._fp_rect.center = self._fp_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3) # self._menu.blit(self._fp_menu_surf, self._fp_menu_rect) # self._mdf_menu_surf, self._mdf_menu_rect = self.text_objects("Generate MDF", self._large_text) # self._mdf_menu_rect.center = ((self._arena_size[0] / 2, 6 * self._arena_size[1] / 7)) # self._mdf_rect = pygame.Rect.copy(self._coverage_rect) # self._mdf_rect.center = self._mdf_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._mdf_rect, 3) # self._menu.blit(self._mdf_menu_surf, self._mdf_menu_rect) self._screen.blit(self._background, (0, 0)) #self._screen.blit(self._alert, (3 * self.BLOCK_SIZE, 8 * self.BLOCK_SIZE)) self._screen.blit(self._menu, (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE, 0)) pygame.display.update()
def __init__(self, environment_id='gym_splendor_code:splendor-v0'): self.environment_id = environment_id self.progress_bar = None self.local_arena = Arena()
main_process = MPI.COMM_WORLD.Get_rank() == 0 do = 1 if do == 1: moominer = ValueFunctionOptimizer() time_s = time.time() val = moominer.eval_metrics(100) if main_process: print(f'Time taken = {time.time() - time_s}') print(f'value = {val}') if do == 2: a1 = GreedyAgentBoost() a2 = ValueFunctionAgent() arek = ArenaMultiThread() res = arek.run_many_games('deterministic', [a1, a2], 100) if main_process: print(res) if do == 3: from agents.random_agent import RandomAgent from agents.value_function_agent import ValueFunctionAgent a1 = RandomAgent() a2 = ValueFunctionAgent() from arena.arena import Arena arek = Arena() arek.run_one_duel('deterministic', [a1, a2], render_game=True)
def __init__(self, alpha): self.agent = QValueAgent() self.env = gym_open_ai.make('splendor-v0') self.weights_token = 'weights_' + str(random.randint(0,1000000)) + '.h5' self.arena = Arena() self.alpha = alpha
class ArenaMultiThread: def __init__(self, environment_id='gym_splendor_code:splendor-v0'): self.environment_id = environment_id self.progress_bar = None self.local_arena = Arena() def create_progress_bar(self, lenght): if main_thread and USE_TQDM: self.progress_bar = tqdm(total=lenght, postfix=None) def set_progress_bar(self, value): if main_thread and USE_TQDM: self.progress_bar.n = min(value, self.progress_bar.total - 1) self.progress_bar.update() def start_collecting_states(self): self.local_arena.start_collecting_states() def collect_only_from_middle_game(self, n_min_actions, dump_probability): self.local_arena.collect_only_from_middle_game(n_min_actions, dump_probability) def stop_collecting_states(self): self.local_arena.start_collecting_states() def dump_collected_states(self, filename, folder): self.local_arena.dump_collected_states(filename, folder, my_rank) def return_collected_states(self): return self.local_arena.collect_states_df def collected_states_to_csv(self, filename): self.local_arena.collected_states_to_csv(filename, my_rank) def one_group_vs_other_duels(self, mode, list_of_agents1: List[Agent], list_of_agents2: List[Agent], games_per_duel: int, shuffle: bool = True): #create all pairs to fightd all_pairs = list(product(list_of_agents1, list_of_agents2)) pairs_to_duel = [pair for pair in all_pairs if pair[0] != pair[1]] #create list of jobs: list_of_jobs = pairs_to_duel * games_per_duel #calculate jobs per thread: jobs_per_thread = int(len(list_of_jobs) / n_proc) remaining_jobs = len(list_of_jobs) % n_proc #create local arena local_results = GameStatisticsDuels(list_of_agents1, list_of_agents2) add_remaining_job = int(my_rank < remaining_jobs) #create progress bar self.create_progress_bar(len(list_of_jobs)) for game_id in range(0, jobs_per_thread + add_remaining_job): if main_thread: pass #print(f'game_id = {game_id}') pair_to_duel = list_of_jobs[game_id * n_proc + my_rank] if shuffle: starting_agent_id = random.choice(range(2)) one_game_results = self.local_arena.run_one_duel( mode, list(pair_to_duel)) local_results.register(one_game_results) if main_thread: self.set_progress_bar((game_id + 1) * n_proc) #gather all results cumulative_results_unprocessed = comm.gather(local_results, root=0) if main_thread: cumulative_results = GameStatisticsDuels(list_of_agents1, list_of_agents2) for one_thread_results in cumulative_results_unprocessed: cumulative_results.register(one_thread_results) return cumulative_results def run_many_games(self, mode, list_of_agents, n_games): return self.one_group_vs_other_duels(mode, [list_of_agents[0]], [list_of_agents[1]], games_per_duel=n_games, shuffle=True) def all_vs_all(self, mode, list_of_agents, n_games): return self.one_group_vs_other_duels(mode, list_of_agents, list_of_agents, games_per_duel=n_games)
class QLearningTrainer: def __init__(self, alpha): self.agent = QValueAgent() self.env = gym_open_ai.make('splendor-v0') self.weights_token = 'weights_' + str(random.randint(0,1000000)) + '.h5' self.arena = Arena() self.alpha = alpha def _set_token(self, token): self.weights_token = token def _get_token(self): return self.weights_token def _save_weights(self): self.agent.model.save_weights(self.weights_token) def _load_weights(self): self.agent.model.load_weights(self.weights_token) def new_value_formula(self, old_value, best_value, winner_id, reward, alpha): if winner_id is not None: return reward if winner_id is None: if old_value and best_value is not None: return (1-alpha)*old_value + alpha*best_value else: return None def run_one_game_and_collect_data(self, debug_info=True): there_was_no_action = False self.agent.train_mode() last_actual_player_0 = None last_actual_player_1 = None last_state_player_0 = None last_state_player_1 = None last_action_vec_player_0 = None last_action_vec_player_1 = None old_value = None old_state = None old_action_vec = None self.env.reset() observation = self.env.show_observation('deterministic') is_done = False number_of_moves = 0 debug_collected_data = pd.DataFrame(columns=('active_player_id', 'winner_id', 'reward', 'best_value')) collected_data = pd.DataFrame(columns=('state_as_vector', 'value')) extra_move_done = False while not (is_done and extra_move_done) and number_of_moves < MAX_NUMBER_OF_MOVES: if is_done: extra_move_done = True current_state_as_dict = StateAsDict(self.env.current_state_of_the_game) actual_action, actual_eval, best_eval = self.agent.choose_action(observation, [None]) if actual_action is None: there_was_no_action = True break #print('best value = {}'.format(best_value)) observation, reward, is_done, info = self.env.step('deterministic', actual_action) previous_player_id = self.env.previous_player_id() winner_id = info['winner_id'] if previous_player_id == 0: old_value = last_actual_player_0 old_state = last_state_player_0 old_action_vec = last_action_vec_player_0 if previous_player_id == 1: old_value = last_actual_player_1 old_state = last_state_player_1 old_action_vec = last_action_vec_player_1 if debug_info: state_status = old_state.__repr__() if old_state is not None else 'NONE' state_vector = vectorize_state(old_state) if old_state is not None else 'NONE' debug_collected_data = debug_collected_data.append({ 'state_ex' : state_status, 'state_vec' : state_vector, 'new_value': self.new_value_formula(old_value, best_eval, winner_id, reward, self.alpha), 'active_player_id' : self.env.previous_player_id(), 'winner_id' : winner_id, 'reward' : reward, 'best_eval' : best_eval, 'actual_eval' : actual_eval, 'old_value': old_value, 'pa_points' : self.env.previous_players_hand().number_of_my_points()}, ignore_index=True) if old_state is not None: collected_data = collected_data.append({'state_as_vector' : vectorize_state(old_state), 'action_vector' : old_action_vec, 'value': self.new_value_formula(old_value, best_eval, winner_id, reward, self.alpha)}, ignore_index=True) if previous_player_id == 0: last_actual_player_0 = actual_eval last_state_player_0 = current_state_as_dict last_action_vec_player_0 = vectorize_action(actual_action) if previous_player_id == 1: last_actual_player_1 = actual_eval last_state_player_1 = current_state_as_dict last_action_vec_player_1 = vectorize_action(actual_action) #let the opponent move: number_of_moves += 1 if debug_info: debug_collected_data.to_csv('debug_info.csv') collected_data = collected_data.iloc[0:] self.agent.test_mode() return collected_data def train_network(self, collected_data, epochs): #prepare X and Y for training: self.agent.model.train_model(data_frame=collected_data, epochs=epochs) def run_test(self, opponent: Agent): results = self.arena.run_many_duels('deterministic', [self.local_trainer.agent, opponent], n_games=comm.Get_size(), n_proc_per_agent=1, shuffle=True) if main_process: print(results) def run_training(self, n_iterations, opponent): if USE_NEPTUNE: neptune.create_experiment('Q learning alpha = '.format(self.alpha)) experience_replay_buffer = None for i in range(n_iterations): collected_data, there_was_no_action = self.run_one_game_and_collect_data(debug_info=True) if not there_was_no_action: self.agent.model.train_model(data_frame=collected_data, epochs=1) if experience_replay_buffer is None: experience_replay_buffer = collected_data else: experience_replay_buffer = experience_replay_buffer.append(collected_data) #Run test print('Game number = {}'.format(i)) if i%20 == 0 and i > 0: self.agent.model.train_model(data_frame=experience_replay_buffer, epochs=2) if i%100 == 0 and i > 0: experience_replay_buffer = None print('Clearing buffer') if i%10 == 0: if USE_NEPTUNE: neptune.send_metric('epsilon', x=self.agent.epsilon) results = self.arena.run_many_duels('deterministic', [self.agent, opponent], number_of_games=50) print(results) if USE_NEPTUNE: for pair in results.data.keys(): neptune.send_metric(pair[0] + '_wins', x=i, y=results.data[pair].wins) neptune.send_metric(pair[0] + '_reward', x=i, y=results.data[pair].reward) neptune.send_metric(pair[0] + '_victory_points', x=i, y=results.data[pair].victory_points) if USE_NEPTUNE: neptune.stop()
from agents.random_agent import RandomAgent from agents.greedy_agent_boost import GreedyAgentBoost from agents.minmax_agent import MinMaxAgent from agents.greedysearch_agent import GreedySearchAgent from arena.arena import Arena fight_pit = Arena() # time_profile = cProfile.Profile() # time_profile.run('fight_pit.run_many_duels([goku, gohan], number_of_games=100)') # time_profile.dump_stats('optimization1.prof') n_games = 10 gohan = GreedyAgentBoost(weight = [100,2,2,1,0.1]) print(gohan.name) goku = RandomAgent(distribution='uniform') print(fight_pit.run_many_duels("deterministic",[goku, gohan], number_of_games = n_games, shuffle_agents=True)) goku = RandomAgent(distribution='uniform_on_types') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) goku = RandomAgent(distribution = 'first_buy') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) gohan = GreedyAgentBoost(weight = [100,2.5,1.5,1,0.1])
from agents.random_agent import RandomAgent from agents.greedy_agent_boost import GreedyAgentBoost from arena.arena import Arena fight_pit = Arena() def run_comparison(n_games=1000): gohan = GreedyAgentBoost() goku = RandomAgent(distribution='uniform') print( fight_pit.run_many_duels([goku, gohan], number_of_games=n_games, shuffle_agents=True)) goku = RandomAgent(distribution='uniform_on_types') print( fight_pit.run_many_duels([goku, gohan], number_of_games=n_games, shuffle_agents=True)) goku = RandomAgent(distribution='first_buy') print( fight_pit.run_many_duels([goku, gohan], number_of_games=n_games, shuffle_agents=True)) gohan = GreedyAgentBoost(weight=[100, 2.5, 1.5, 1, 0.1])
class MultiArena: def __init__(self) -> None: self.env_initialized = False self.name = 'Multi Process Arena' self.collect_states_mode = False self.local_arena = Arena() def initialize_env( self, environment_id: str = 'gym_splendor_code:splendor-deterministic-v0' ): """Arena has its private environment to run the game.""" self.env = gym_open_ai.make(environment_id) # def run_multi_process_self_play(self, mode, agent: Agent, render_game = False): # # self.local_arena.run_self_play(mode, agent, render_game=render_game, mpi_communicator=comm) def run_many_duels(self, mode, list_of_agents: List[Agent], n_games: int, n_proc_per_agent: int, shuffle: bool = True): assert n_games > 0, 'Number of games must be positive.' assert len( list_of_agents) == 2, 'This method can run on exactly two agents.' n_process = comm.Get_size() my_rank = comm.Get_rank() n_proc_per_agent = max(min(n_proc_per_agent, n_proc), 1) n_parallel_games = int(n_process / n_proc_per_agent) remaining_processes = n_process % n_proc_per_agent extra_process_per_game = int(remaining_processes / n_parallel_games) remaining_processes_after_all = remaining_processes % n_parallel_games colors = [] for i in range(n_parallel_games): if i < remaining_processes_after_all: processes_to_add = n_proc_per_agent + extra_process_per_game + 1 colors += [i] * processes_to_add if i >= remaining_processes_after_all: processes_to_add = n_proc_per_agent + extra_process_per_game colors += [i] * processes_to_add my_color = colors[my_rank] #set agents colors: for agent in list_of_agents: agent.set_color(my_color) #create communicators: new_communicator = comm.Split(my_color) #prepare jobs for each group of processes n_games_for_one_communicator = int(n_games / n_parallel_games) remaining_games = n_games % n_parallel_games if my_color < remaining_games: my_games = n_games_for_one_communicator + 1 if my_color >= remaining_games: my_games = n_games_for_one_communicator local_main = new_communicator.Get_rank() == 0 if local_main: print('My color = {} I have to take = {} games'.format( my_color, my_games)) local_results = GameStatisticsDuels(list_of_agents[:1], list_of_agents[1:]) for _ in range(my_games): if shuffle: starting_agent_id = random.choice(range(2)) one_game_results = self.local_arena.run_one_duel( mode, list_of_agents, mpi_communicator=new_communicator) if local_main: local_results.register(one_game_results) #Gather all results: combined_results_list = comm.gather(local_results, root=0) if main_process: global_results = GameStatisticsDuels(list_of_agents[:1], list_of_agents[1:]) for local_result in combined_results_list: global_results.register(local_result) return global_results
from agents.greedy_agent_boost import GreedyAgentBoost from agents.random_agent import RandomAgent from agents.single_mcts_agent import SingleMCTSAgent from arena.arena import Arena from monte_carlo_tree_search.evaluation_policies.heura_val import HeuraEvaluator arek = Arena() a1 = GreedyAgentBoost() a2 = SingleMCTSAgent(150, HeuraEvaluator(), 0.4, True, False) results = arek.run_many_duels('deterministic', [a1, a2], 1, True) print(results)
def __init__(self) -> None: self.env_initialized = False self.name = 'Multi Process Arena' self.collect_states_mode = False self.local_arena = Arena()
from agents.random_agent import RandomAgent from agents.minmax_agent import MinMaxAgent from agents.greedysearch_agent2 import GreedySearchAgent from arena.arena import Arena environment_id = 'gym_splendor_code:splendor-v1' fight_pit = Arena(environment_id) goku = RandomAgent(distribution='first_buy') goku2 = RandomAgent(distribution='uniform') #gohan = RandomAgent(distribution='uniform_on_types') #gohan = RandomAgent(distribution='uniform') #goku = GreedyAgen/t(weight = 0.3) gohan = GreedySearchAgent(depth=5) goku = MinMaxAgent(name="MinMax", depth=3) gohan.name = "g2" goku.name = "g1" # profi = cProfile.Profile() # # profi.run('(fight_pit.run_many_duels([goku, gohan], number_of_games=50))') # profi.dump_stats('profi2.prof') fight_pit.run_one_duel([goku, gohan], render_game=True) # time_dupa = time.time() # for i in range(100): # print(i) # fight_pit = Arena() # fight_pit.run_one_duel([goku, gohan], starting_agent_id=0) # print(time.time() - time_dupa)
def __init__(self, gems_encoder_dim : int = None, price_encoder_dim : int = None, profit_encoder_dim : int = None, cards_points_dim: int = None, cards_dense1_dim: int = None, cards_dense2_dim: int = None, board_nobles_dense1_dim : int = None, board_nobles_dense2_dim : int = None, full_board_dense1_dim: int = None, full_board_dense2_dim: int = None, player_points_dim: int = None, player_nobles_dim: int = None, full_player_dense1_dim: int = None, full_player_dense2_dim: int = None, final_layer= None, data_transformer = None, network_name: str = None ): super().__init__() self.vectorizer = Vectorizer() self.final_layer = final_layer self.data_transformer = data_transformer self.params['data transormation'] = self.data_transformer.name self.params['final layer name'] = self.final_layer.name self.params['gems_encoder_dim'] = gems_encoder_dim self.params['gems_encoder_dim'] = gems_encoder_dim self.params['price_encoder_dim'] = price_encoder_dim self.params['profit_encoder_dim'] = profit_encoder_dim self.params['cards_points_dim'] = cards_points_dim self.params['cards_dense1_dim'] = cards_dense1_dim self.params['cards_dense2_dim'] = cards_dense2_dim self.params['board_nobles_dense1_dim'] = board_nobles_dense1_dim self.params['board_nobles_dense2_dim'] = board_nobles_dense2_dim self.params['full_board_dense1_dim']= full_board_dense1_dim self.params['full_board_dense2_dim'] = full_board_dense2_dim self.params['player_points_dim'] = player_points_dim self.params['player_nobles_dim'] = player_nobles_dim self.params['full_player_dense1_dim'] = full_player_dense1_dim self.params['full_player_dense2_dim']= full_player_dense2_dim self.arena = Arena() self.network_agent = ValueNNAgent(self) self.easy_opp = RandomAgent(distribution='first_buy') self.medium_opp = GreedyAgentBoost() self.hard_opp = MinMaxAgent() self.neptune_monitor = NeptuneMonitor() self.network_name = network_name self.gems_encoder = GemsEncoder(gems_encoder_dim) self.price_encoder = PriceEncoder(price_encoder_dim) self.board_encoder = BoardEncoder(self.gems_encoder, ManyNoblesEncoder(price_encoder_dim, board_nobles_dense1_dim, board_nobles_dense2_dim), ManyCardsEncoder(MAX_CARDS_ON_BORD, profit_encoder_dim, price_encoder_dim, cards_points_dim, cards_dense1_dim, cards_dense2_dim ), full_board_dense1_dim, full_board_dense2_dim) self.player_encoder = PlayerEncoder(self.gems_encoder, self.price_encoder, ManyCardsEncoder(MAX_RESERVED_CARDS, profit_encoder_dim, price_encoder_dim, cards_points_dim, cards_dense1_dim, cards_dense2_dim ), player_points_dim, player_nobles_dim, full_player_dense1_dim, full_player_dense2_dim) active_player_input = PlayersInputGenerator('active_').inputs other_player_input = PlayersInputGenerator('other_').inputs board_input = self.board_encoder.inputs self.inputs = board_input + active_player_input + other_player_input board_encoded = self.board_encoder(board_input) active_player_encoded = self.player_encoder(active_player_input) other_player_encoded = self.player_encoder(other_player_input) full_state = Concatenate(axis=-1)([board_encoded, active_player_encoded, other_player_encoded]) full_state = Dense(full_player_dense1_dim, activation='relu')(full_state) final_state = Dense(full_player_dense2_dim, activation='relu')(full_state) result = self.final_layer(final_state) self.layer = Model(inputs = self.inputs, outputs = final_state, name = 'full_state_splendor_estimator') self.network = Model(inputs = self.inputs, outputs = result, name = 'full_state_splendor_estimator') self.network.compile(Adam(), loss='mean_squared_error') self.params['Model name'] = 'Average pooling model' self.params['optimizer_name'] = 'Adam'
class Simulator: def __init__(self, real_run): self._real_run = real_run pygame.init() def init_objects(self, robot, map_file): self._robot = copy.deepcopy(robot) self._real_map = Arena(self._robot) ArenaUtils.load_arena_from_file(self._real_map, map_file) self._explore_map = Arena(self._robot) self._explore_map.set_allunexplored() self.BLOCK_SIZE = 30 self._waypoint = None # Simulator Main Window self._arena_size = (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE, ArenaConstant.ARENA_ROW.value * self.BLOCK_SIZE) #self._screen = pygame.display.set_mode((self._arena_size[0] * 2, self._arena_size[1])) self._screen = pygame.display.set_mode( (self._arena_size[0], self._arena_size[1])) self._screen.fill((0, 0, 0)) # Simulator Background self._background = pygame.Surface(self._arena_size) self._background = self._background.convert() self._background.fill((169, 169, 169)) # Simulator Alerts # self._alert = pygame.Surface((9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE)) # self._alert = self._alert.convert() # self._alert.fill((255,255,255)) # pygame.draw.rect(self._alert, (0,0,0), (0, 0, 9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE), 1) # Simulator Background self._menu = pygame.Surface(self._arena_size) self._menu = self._menu.convert() self._menu.fill((255, 255, 255)) # Prepare Background and Arena for row in range(ArenaConstant.ARENA_ROW.value): for col in range(ArenaConstant.ARENA_COL.value): if row == 17 and col == 0: pygame.draw.rect( self._background, (0, 100, 0), (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE, self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3)) if row == 0 and col == 12: pygame.draw.rect( self._background, (255, 140, 0), (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE, self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3)) pygame.draw.rect(self._background, (0, 0, 0), (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE, self.BLOCK_SIZE, self.BLOCK_SIZE), 1) # Simulator Menu # self._large_text = pygame.font.Font('freesansbold.ttf',20) # self._coverage_ex_menu_surf, self._coverage_ex_menu_rect = self.text_objects("Coverage-Limited Exploration", self._large_text) # self._coverage_ex_menu_rect.center = ((self._arena_size[0] / 2, 3 * self._arena_size[1] / 7)) # self._coverage_rect = pygame.Rect(self._coverage_ex_menu_rect.left - 15, self._coverage_ex_menu_rect.top - 15, self._coverage_ex_menu_rect.width + self.BLOCK_SIZE, self._coverage_ex_menu_rect.height + self.BLOCK_SIZE) # pygame.draw.rect(self._menu, (0,0,0), self._coverage_rect, 3) # self._menu.blit(self._coverage_ex_menu_surf, self._coverage_ex_menu_rect) # self._ex_menu_surf, self._ex_menu_rect = self.text_objects("Exploration", self._large_text) # self._ex_menu_rect.center = ((self._arena_size[0] / 2, self._arena_size[1] / 7)) # self._ex_rect = pygame.Rect.copy(self._coverage_rect) # self._ex_rect.center = self._ex_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._ex_rect, 3) # self._menu.blit(self._ex_menu_surf, self._ex_menu_rect) # self._timed_ex_menu_surf, self._timed_ex_menu_rect = self.text_objects("Timed Exploration", self._large_text) # self._timed_ex_menu_rect.center = ((self._arena_size[0] / 2, 2 * self._arena_size[1] / 7)) # self._timed_rect = pygame.Rect.copy(self._coverage_rect) # self._timed_rect.center = self._timed_ex_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._timed_rect, 3) # self._menu.blit(self._timed_ex_menu_surf, self._timed_ex_menu_rect) # self._wp_menu_surf, self._wp_menu_rect = self.text_objects("Add Waypoint", self._large_text) # self._wp_menu_rect.center = ((self._arena_size[0] / 2, 4 * self._arena_size[1] / 7)) # self._wp_rect = pygame.Rect.copy(self._coverage_rect) # self._wp_rect.center = self._wp_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._wp_rect, 3) # self._menu.blit(self._wp_menu_surf, self._wp_menu_rect) # self._fp_menu_surf, self._fp_menu_rect = self.text_objects("Fastest Path", self._large_text) # self._fp_menu_rect.center = ((self._arena_size[0] / 2, 5 * self._arena_size[1] / 7)) # self._fp_rect = pygame.Rect.copy(self._coverage_rect) # self._fp_rect.center = self._fp_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3) # self._menu.blit(self._fp_menu_surf, self._fp_menu_rect) # self._mdf_menu_surf, self._mdf_menu_rect = self.text_objects("Generate MDF", self._large_text) # self._mdf_menu_rect.center = ((self._arena_size[0] / 2, 6 * self._arena_size[1] / 7)) # self._mdf_rect = pygame.Rect.copy(self._coverage_rect) # self._mdf_rect.center = self._mdf_menu_rect.center # pygame.draw.rect(self._menu, (0,0,0), self._mdf_rect, 3) # self._menu.blit(self._mdf_menu_surf, self._mdf_menu_rect) self._screen.blit(self._background, (0, 0)) #self._screen.blit(self._alert, (3 * self.BLOCK_SIZE, 8 * self.BLOCK_SIZE)) self._screen.blit(self._menu, (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE, 0)) pygame.display.update() def add_waypoint(self, pos): self._waypoint = pos pygame.draw.rect(self._background, (0, 0, 128), (pos[1] * self.BLOCK_SIZE, pos[0] * self.BLOCK_SIZE, self.BLOCK_SIZE, self.BLOCK_SIZE)) pygame.draw.rect(self._background, (0, 0, 0), (pos[1] * self.BLOCK_SIZE, pos[0] * self.BLOCK_SIZE, self.BLOCK_SIZE, self.BLOCK_SIZE), 1) self._screen.blit(self._background, (0, 0)) pygame.display.update() def text_objects(self, text, font): text_surface = font.render(text, True, (0, 0, 0)) return text_surface, text_surface.get_rect() def get_key(self): while 1: event = pygame.event.poll() if event.type == KEYDOWN: return event.key else: pass def display_box(self, screen, message): "Print a message in a box in the middle of the screen" fontobject = pygame.font.Font(None, 18) pygame.draw.rect(screen, (0, 0, 0), ((screen.get_width() / 2) - 100, (screen.get_height() / 2) - 10, 200, 20), 0) pygame.draw.rect(screen, (255, 255, 255), ((screen.get_width() / 2) - 102, (screen.get_height() / 2) - 12, 204, 24), 1) if len(message) != 0: screen.blit(fontobject.render(message, 1, (255, 255, 255)), ((screen.get_width() / 2) - 100, (screen.get_height() / 2) - 10)) pygame.display.flip() def ask(self, screen, question): "ask(screen, question) -> answer" pygame.font.init() current_string = [] self.display_box(screen, question + ": " + ''.join(current_string)) while 1: inkey = self.get_key() if inkey == K_BACKSPACE: current_string = current_string[0:-1] elif inkey == K_RETURN: break elif inkey == K_MINUS: current_string.append("_") elif inkey <= 127: current_string.append(chr(inkey)) self.display_box(screen, question + ": " + ''.join(current_string)) return ''.join(current_string) def run_exploration(self, robot, map_file): self.init_objects(robot, map_file) while True: # _mouse = pygame.mouse.get_pos() # if 450 + self._ex_rect.left < _mouse[0] < 450 + self._ex_rect.right and self._ex_rect.top < _mouse[1] < self._ex_rect.bottom: # pygame.draw.rect(self._menu, (255,0,0), self._ex_rect, 3) # else: # pygame.draw.rect(self._menu, (0,0,0), self._ex_rect, 3) # if 450 + self._timed_rect.left < _mouse[0] < 450 + self._timed_rect.right and self._timed_rect.top < _mouse[1] < self._timed_rect.bottom: # pygame.draw.rect(self._menu, (255,0,0), self._timed_rect, 3) # else: # pygame.draw.rect(self._menu, (0,0,0), self._timed_rect, 3) # if 450 + self._coverage_rect.left < _mouse[0] < 450 + self._coverage_rect.right and self._coverage_rect.top < _mouse[1] < self._coverage_rect.bottom: # pygame.draw.rect(self._menu, (255,0,0), self._coverage_rect, 3) # else: # pygame.draw.rect(self._menu, (0,0,0), self._coverage_rect, 3) # if 450 + self._wp_rect.left < _mouse[0] < 450 + self._wp_rect.right and self._wp_rect.top < _mouse[1] < self._wp_rect.bottom: # pygame.draw.rect(self._menu, (255,0,0), self._wp_rect, 3) # else: # pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3) # if 450 + self._fp_rect.left < _mouse[0] < 450 + self._fp_rect.right and self._fp_rect.top < _mouse[1] < self._fp_rect.bottom: # pygame.draw.rect(self._menu, (255,0,0), self._fp_rect, 3) # else: # pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3) # if 450 + self._mdf_rect.left < _mouse[0] < 450 + self._mdf_rect.right and self._mdf_rect.top < _mouse[1] < self._mdf_rect.bottom: # pygame.draw.rect(self._menu, (255,0,0), self._mdf_rect, 3) # else: # pygame.draw.rect(self._menu, (0,0,0), self._mdf_rect, 3) # self._screen.blit(self._menu, (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE,0)) # pygame.display.update() keys = pygame.key.get_pressed() for event in pygame.event.get(): if event.type == QUIT or keys[pygame.K_ESCAPE]: CommMgr.close() pygame.quit() sys.exit() elif keys[pygame.K_RETURN]: self.init_objects(robot, map_file) if self._real_run: CommMgr.connect() _explore = Exploration(self._explore_map, self._real_map, self._robot, 300, 3600, (self._screen, self._background)) _explore.run() if self._real_run: CommMgr.close()