def delete_temp(self, old_model, temp_model): """ Delete temporary files in case of 2 versions of same model (but different times of training). Keep the best model and delete the rest. Parameters ---------- old_model: string Model filename before training. temp_model: string Temporary model filename (after training). Return ------ use_training: boolean False only if old model wins against new temp model. True otherwise. """ use_training = True # Several versions of same model if (old_model is not None) and (temp_model == old_model + '_temp'): # Confront them agent1 = RLAgent() agent1.load_model(old_model) agent2 = RLAgent() agent2.load_model(temp_model) results = compare_agents(agent1, agent2, n_games=10, time_limit=100, verbose=False) # Keep best if results[3] >= results[2]: # More trained agent is the best os.remove('Models/' + old_model + '.csv') os.remove('Models/data/count_' + old_model + '.csv') os.rename(r'Models/' + temp_model + '.csv', r'Models/' + old_model + '.csv') os.rename(r'Models/data/count_' + temp_model + '.csv', r'Models/data/count_' + old_model + '.csv') else: # Less trained agent is the best os.remove('Models/' + temp_model + '.csv') os.remove('Models/data/count_' + temp_model + '.csv') use_training = False return use_training
def tournament(self, change_opp=False): """ Method to rank the different models obtained after any training. For the values of the factor epsilon of an RL Agent, declared in init method, this method creates a tournament for the corresponding different models. Each model plays 10 games against all others and the scores of each model against another are stored in a CSV file. A TXT file is also generated using the CSV file: it displays rankings of each model, alongside its total score against all other models. Parameter --------- change_opp: boolean Set to True to consider agents trained with mixed opponents participating to the tournament. Outputs ------- Tournament report: CSV file Located at: 'Models/results/(self.tournament_name).csv'. File storing the results of each confrontation between 2 agents. Tournament ranking: TXT file Located at: 'Models/results/(self.tournament_name).txt'. File ranking the agents using the results of the tournament, with total score of each agent displayed. """ n_players = len(self.epsilon_values) * ( (int(self.random_training) + int(self.self_training)) * (1 + int(change_opp))) print('-----------------------------') print('TOURNAMENT with {} agents'.format(n_players)) print('-----------------------------\n') # Initialization of scores: some rows and columns are # only used for saving configurations of models # (epsilon, opponents, change of opponent). scores = -np.ones((n_players + 3, n_players + 3)) # List of opponent kinds training_ways = [] if self.random_training: training_ways.append('Random') if change_opp: training_ways.append('RandomvsSelf') if self.self_training: training_ways.append('Self') if change_opp: training_ways.append('SelfvsRandom') # List of players players = [[epsilon, training_way] for epsilon in self.epsilon_values for training_way in training_ways] for idx1, player1 in enumerate(players): epsilon1 = player1[0] training_way1 = player1[1] filename = ('greedy' + str(epsilon1)[0] + '_' + str(epsilon1)[2:] + '_vs' + training_way1) # Load first agent agent1 = RLAgent() agent1.load_model(filename) # Save config of agent1 scores[idx1 + 3, 0] = epsilon1 # 0: RANDOM | 1: SELF scores[idx1 + 3, 1] = (int(training_way1 == 'Self') + int(training_way1 == 'SelfvsRandom')) # -1: nothing | 0: Random vs Self | 1: Self vs Random scores[idx1 + 3, 2] = -1 + (2 * int(training_way1 == 'SelfvsRandom') + int(training_way1 == 'RandomvsSelf')) for idx2, player2 in enumerate(players): epsilon2 = player2[0] training_way2 = player2[1] filename = ('greedy' + str(epsilon2)[0] + '_' + str(epsilon2)[2:] + '_vs' + training_way2) # Load second agent agent2 = RLAgent() agent2.load_model(filename) # Save config of agent2 scores[0, idx2 + 3] = epsilon2 scores[1, idx2 + 3] = (int(training_way2 == 'Self') + int(training_way2 == 'SelfvsRandom')) scores[2, idx2 + 3] = -1 + (2 * int(training_way2 == 'SelfvsRandom') + int(training_way2 == 'RandomvsSelf')) print('Current match:') print('Player1: epsilon = {}, trained vs {}'.format( epsilon1, training_way1)) print('Player2: epsilon = {}, trained vs {}'.format( epsilon2, training_way2)) results = compare_agents(agent1, agent2, n_games=10, time_limit=100, verbose=False) # Score of agent1 scores[idx1 + 3, idx2 + 3] = results[2] # Score of agent2 scores[idx2 + 3, idx1 + 3] = results[3] print('------') # Update tournament file name name = self.tournament_name[:-1] nbr = int(self.tournament_name[-1]) nbr += 1 self.tournament_name = name + str(nbr) # Save tournament np.savetxt(str('Models/results/' + self.tournament_name + '.csv'), scores, delimiter=',') # Rank players self.tournament_ranking(self.tournament_name, self.tournament_name) print('Results of tournament are stored in {}.csv and {}.txt\n'.format( self.tournament_name, self.tournament_name))
def train(n_epochs, epsilon, gamma, load_model, filename, random_opponent, n_games_test, freq_test, n_skip_games=int(0), verbose=False): """ Train 2 agents by making them play and learn together. Save the learned Q-function into CSV file. It is possible to confront 1 of the agents (against either the user or a Random Agent) during training, as often as one wants. It is also possible to train an already trained model. Parameters ---------- n_epochs: int Number of games used for training. epsilon: float (in [0,1]) Fraction of greedy decisions during training of the 2 RL Agents. gamma: float (in [0,1]) Factor of significance of first actions over last ones for the 2 RL Agents. load_model: string CSV filename in which is stored the learned Q-function of an agent. If load_model = 'model', the function loads the model './Models/model.csv'. If load_model is not None, the previous parameters epsilon and gamma are used for a second training. filename: string Name of the CSV file that will store the learned Q-function of one of the agents. The path to CSV file is then ./Models/filename.csv. The counter of state-action pairs is also stored at ./Models/data/count_filename.csv for future training. random_opponent: boolean If set to true, the function trains 1 RL Agent by making it play against a Random Agent. Otherwise, the RL agent is trained by playing against another version of itself. n_games_test: int Number of games one of the RL Agent plays against a Random Agent for testing. If set to 0, the RL Agents will not be tested by a Random Agent. freq_test: int Number of epochs after which one of the RL Agents plays n_games_test games against a Random Agent. If set to 1000, each 1000 epochs of training, one of the RL Agents is tested against a Random Agent. If set to 0, test occurs at the last epoch of training only. If set to -1, none of the agents is tested during training. n_skip_games: int Number of epochs after which the user can choose to play against one of the learning agents. If set to 1000, each 1000 games, the user can choose to play against one agent. If set to 0, the user can choose to play against one agent at the last epoch only. If set to -1, no choice is offered and the user cannot test any agent. verbose: boolean If set to True, each game action during training has a written explanation. Return ------ learning_results: list Only significant with n_games_test > 0 (otherwise, empty list by default). List of each n_epochs // freq_test epoch test results against a Random Agent. Each test result is a list: [current epoch, score of RL Agent, number of finished games, n_games test]. """ # Learning agent agent1 = RLAgent(epsilon, gamma) if load_model is not None: agent1.load_model(load_model) # Choose opponent if random_opponent: agent2 = RandomAgent() time_limit = None print('Training vs Random') else: agent2 = RLAgent(epsilon, gamma) if load_model is not None: agent2.load_model(load_model) time_limit = None print('Training vs Self') start_idx = 0 scores = [0, 0] # If the user only confronts the agent at the last epoch # or if no confrontation if n_skip_games in [-1, 0]: n_skip_games = n_epochs - n_skip_games # Boolean for game between the user and agent1 preceding a game # between agent1 and agent2 play_checkpoint_usr = False # If there is a test of agent1 at the last epoch only or no test if freq_test in [-1, 0]: freq_test = n_epochs - freq_test # Number of games between agent1 and a Random Agent for testing n_games_test_mem = n_games_test learning_results = [] # Start training print('Training epoch:') for epoch in range(1, n_epochs + 1): if epoch % (n_epochs // 10) == 0: print(epoch, '/', n_epochs) #Update boolean for playing with user play_checkpoint_usr = bool(epoch % n_skip_games == 0) if play_checkpoint_usr: # Print training status print('Number of games: ', epoch) print('Scores: ', scores) # Ask user to play play = int(input('Play ? (1 Yes | 0 No)\n')) play_checkpoint_usr = bool(play) # Update boolean for test n_games_test = int(epoch % freq_test == 0) * n_games_test_mem # Start game game_over, winner, test_results = game_2Agents( agent1, agent2, start_idx=start_idx, train=True, time_limit=time_limit, n_games_test=n_games_test, play_checkpoint_usr=play_checkpoint_usr, verbose=verbose) assert game_over, str('Game not over but new game' + ' beginning during training') if winner in [0, 1]: scores[winner] += 1 # Save test games of agent1 against a Random Agent if bool(n_games_test): assert len(test_results) != 0, \ 'Agent1 has been tested but there is no result of that.' learning_results.append( [epoch, test_results[2], test_results[0], test_results[1]]) # Next round start_idx = 1 - start_idx # Save Q-function of agent1 np.savetxt(str('Models/' + filename + '.csv'), agent1.Q, delimiter=',') # Save stats for learning rate of agent1 np.savetxt(str('Models/data/count_' + filename + '.csv'), agent1.count_state_action, delimiter=',') return learning_results
np.savetxt(str('Models/data/count_' + filename + '.csv'), agent1.count_state_action, delimiter=',') return learning_results if __name__ == "__main__": train(n_epochs=5000, epsilon=0.6, gamma=1.0, load_model=None, filename='greedy0_6_vsSelf_test', random_opponent=False, n_games_test=0, freq_test=-1, n_skip_games=-1, verbose=False) agent1 = RLAgent() agent1.load_model('greedy0_2_vsRandomvsSelf') agent2 = RLAgent() agent2.load_model('greedy0_6_vsSelf_test') results = compare_agents(agent1, agent2, n_games=10, time_limit=None, verbose=False) print(results)