Пример #1
0
    def delete_temp(self, old_model, temp_model):
        """
    Delete temporary files in case of 2 versions of same model 
    (but different times of training). Keep the best model and 
    delete the rest.

    Parameters
    ----------
    old_model: string
      Model filename before training.
    temp_model: string
      Temporary model filename (after training).

    Return
    ------
    use_training: boolean
      False only if old model wins against new temp model.
      True otherwise.
    """

        use_training = True
        # Several versions of same model
        if (old_model is not None) and (temp_model == old_model + '_temp'):
            # Confront them
            agent1 = RLAgent()
            agent1.load_model(old_model)
            agent2 = RLAgent()
            agent2.load_model(temp_model)
            results = compare_agents(agent1,
                                     agent2,
                                     n_games=10,
                                     time_limit=100,
                                     verbose=False)

            # Keep best
            if results[3] >= results[2]:
                # More trained agent is the best
                os.remove('Models/' + old_model + '.csv')
                os.remove('Models/data/count_' + old_model + '.csv')
                os.rename(r'Models/' + temp_model + '.csv',
                          r'Models/' + old_model + '.csv')
                os.rename(r'Models/data/count_' + temp_model + '.csv',
                          r'Models/data/count_' + old_model + '.csv')
            else:
                # Less trained agent is the best
                os.remove('Models/' + temp_model + '.csv')
                os.remove('Models/data/count_' + temp_model + '.csv')
                use_training = False
        return use_training
Пример #2
0
    def tournament(self, change_opp=False):
        """
    Method to rank the different models obtained after any training. 
    For the values of the factor epsilon of an RL Agent, declared 
    in init method, this method creates a tournament for the 
    corresponding different models. Each model plays 10 games 
    against all others and the scores of each model against another 
    are stored in a CSV file. A TXT file is also generated using
    the CSV file: it displays rankings of each model, alongside
    its total score against all other models.

    Parameter
    ---------
    change_opp: boolean
      Set to True to consider agents trained with mixed opponents
      participating to the tournament.

    Outputs
    -------
    Tournament report: CSV file
      Located at: 'Models/results/(self.tournament_name).csv'.
      File storing the results of each confrontation between 2 
      agents.
    Tournament ranking: TXT file
      Located at: 'Models/results/(self.tournament_name).txt'.
      File ranking the agents using the results of the tournament,
      with total score of each agent displayed.
    """

        n_players = len(self.epsilon_values) * (
            (int(self.random_training) + int(self.self_training)) *
            (1 + int(change_opp)))

        print('-----------------------------')
        print('TOURNAMENT with {} agents'.format(n_players))
        print('-----------------------------\n')

        # Initialization of scores: some rows and columns are
        # only used for saving configurations of models
        # (epsilon, opponents, change of opponent).
        scores = -np.ones((n_players + 3, n_players + 3))

        # List of opponent kinds
        training_ways = []
        if self.random_training:
            training_ways.append('Random')
            if change_opp:
                training_ways.append('RandomvsSelf')
        if self.self_training:
            training_ways.append('Self')
            if change_opp:
                training_ways.append('SelfvsRandom')

        # List of players
        players = [[epsilon, training_way] for epsilon in self.epsilon_values
                   for training_way in training_ways]

        for idx1, player1 in enumerate(players):
            epsilon1 = player1[0]
            training_way1 = player1[1]
            filename = ('greedy' + str(epsilon1)[0] + '_' + str(epsilon1)[2:] +
                        '_vs' + training_way1)

            # Load first agent
            agent1 = RLAgent()
            agent1.load_model(filename)

            # Save config of agent1
            scores[idx1 + 3, 0] = epsilon1
            # 0: RANDOM | 1: SELF
            scores[idx1 + 3, 1] = (int(training_way1 == 'Self') +
                                   int(training_way1 == 'SelfvsRandom'))
            # -1: nothing | 0: Random vs Self | 1: Self vs Random
            scores[idx1 + 3,
                   2] = -1 + (2 * int(training_way1 == 'SelfvsRandom') +
                              int(training_way1 == 'RandomvsSelf'))

            for idx2, player2 in enumerate(players):
                epsilon2 = player2[0]
                training_way2 = player2[1]
                filename = ('greedy' + str(epsilon2)[0] + '_' +
                            str(epsilon2)[2:] + '_vs' + training_way2)

                # Load second agent
                agent2 = RLAgent()
                agent2.load_model(filename)

                # Save config of agent2
                scores[0, idx2 + 3] = epsilon2
                scores[1, idx2 + 3] = (int(training_way2 == 'Self') +
                                       int(training_way2 == 'SelfvsRandom'))
                scores[2, idx2 +
                       3] = -1 + (2 * int(training_way2 == 'SelfvsRandom') +
                                  int(training_way2 == 'RandomvsSelf'))

                print('Current match:')
                print('Player1: epsilon = {}, trained vs {}'.format(
                    epsilon1, training_way1))
                print('Player2: epsilon = {}, trained vs {}'.format(
                    epsilon2, training_way2))

                results = compare_agents(agent1,
                                         agent2,
                                         n_games=10,
                                         time_limit=100,
                                         verbose=False)

                # Score of agent1
                scores[idx1 + 3, idx2 + 3] = results[2]
                # Score of agent2
                scores[idx2 + 3, idx1 + 3] = results[3]

                print('------')

        # Update tournament file name
        name = self.tournament_name[:-1]
        nbr = int(self.tournament_name[-1])
        nbr += 1
        self.tournament_name = name + str(nbr)

        # Save tournament
        np.savetxt(str('Models/results/' + self.tournament_name + '.csv'),
                   scores,
                   delimiter=',')

        # Rank players
        self.tournament_ranking(self.tournament_name, self.tournament_name)

        print('Results of tournament are stored in {}.csv and {}.txt\n'.format(
            self.tournament_name, self.tournament_name))
Пример #3
0
def train(n_epochs,
          epsilon,
          gamma,
          load_model,
          filename,
          random_opponent,
          n_games_test,
          freq_test,
          n_skip_games=int(0),
          verbose=False):
    """
  Train 2 agents by making them play and learn together. Save the
  learned Q-function into CSV file. It is possible to confront 1 of 
  the agents (against either the user or a Random Agent) during 
  training, as often as one wants. It is also possible to train an already 
  trained model.

  Parameters
  ----------
  n_epochs: int
    Number of games used for training.
  epsilon: float (in [0,1])
    Fraction of greedy decisions during training of the 2 RL Agents.
  gamma: float (in [0,1])
    Factor of significance of first actions over last ones for the 
    2 RL Agents.
  load_model: string
    CSV filename in which is stored the learned Q-function of an 
    agent. If load_model = 'model', the function loads the model 
    './Models/model.csv'. If load_model is not None, the previous 
    parameters epsilon and gamma are used for a second training.
  filename: string
    Name of the CSV file that will store the learned Q-function 
    of one of the agents. The path to CSV file is 
    then ./Models/filename.csv. The counter of state-action
    pairs is also stored at ./Models/data/count_filename.csv for
    future training.
  random_opponent: boolean
    If set to true, the function trains 1 RL Agent by making it 
    play against a Random Agent. Otherwise, the RL agent is
    trained by playing against another version of itself.
  n_games_test: int
    Number of games one of the RL Agent plays against a Random Agent
    for testing. If set to 0, the RL Agents will not be tested by a 
    Random Agent. 
  freq_test: int
    Number of epochs after which one of the RL Agents plays n_games_test
    games against a Random Agent. If set to 1000, each 1000 epochs of
    training, one of the RL Agents is tested against a Random Agent.
    If set to 0, test occurs at the last epoch of training only.
    If set to -1, none of the agents is tested during training.
  n_skip_games: int 
    Number of epochs after which the user can choose to play 
    against one of the learning agents. If set to 1000, 
    each 1000 games, the user can choose to play against 
    one agent. If set to 0, the user can choose to play against one 
    agent at the last epoch only. If set to -1, no choice is offered 
    and the user cannot test any agent.
  verbose: boolean
    If set to True, each game action during training has a 
    written explanation.

  Return
  ------
  learning_results: list
    Only significant with n_games_test > 0 (otherwise, empty list 
    by default). List of each n_epochs // freq_test epoch test results 
    against a Random Agent. Each test result is a list: 
    [current epoch, score of RL Agent, number of finished games, 
    n_games test].
  """

    # Learning agent
    agent1 = RLAgent(epsilon, gamma)
    if load_model is not None:
        agent1.load_model(load_model)

    # Choose opponent
    if random_opponent:
        agent2 = RandomAgent()
        time_limit = None
        print('Training vs Random')
    else:
        agent2 = RLAgent(epsilon, gamma)
        if load_model is not None:
            agent2.load_model(load_model)
        time_limit = None
        print('Training vs Self')

    start_idx = 0
    scores = [0, 0]

    # If the user only confronts the agent at the last epoch
    # or if no confrontation
    if n_skip_games in [-1, 0]:
        n_skip_games = n_epochs - n_skip_games

    # Boolean for game between the user and agent1 preceding a game
    # between agent1 and agent2
    play_checkpoint_usr = False

    # If there is a test of agent1 at the last epoch only or no test
    if freq_test in [-1, 0]:
        freq_test = n_epochs - freq_test

    # Number of games between agent1 and a Random Agent for testing
    n_games_test_mem = n_games_test
    learning_results = []

    # Start training
    print('Training epoch:')
    for epoch in range(1, n_epochs + 1):

        if epoch % (n_epochs // 10) == 0:
            print(epoch, '/', n_epochs)

        #Update boolean for playing with user
        play_checkpoint_usr = bool(epoch % n_skip_games == 0)
        if play_checkpoint_usr:
            # Print training status
            print('Number of games: ', epoch)
            print('Scores: ', scores)
            # Ask user to play
            play = int(input('Play ? (1 Yes | 0 No)\n'))
            play_checkpoint_usr = bool(play)

        # Update boolean for test
        n_games_test = int(epoch % freq_test == 0) * n_games_test_mem

        # Start game
        game_over, winner, test_results = game_2Agents(
            agent1,
            agent2,
            start_idx=start_idx,
            train=True,
            time_limit=time_limit,
            n_games_test=n_games_test,
            play_checkpoint_usr=play_checkpoint_usr,
            verbose=verbose)

        assert game_over, str('Game not over but new game' +
                              ' beginning during training')

        if winner in [0, 1]:
            scores[winner] += 1

        # Save test games of agent1 against a Random Agent
        if bool(n_games_test):
            assert len(test_results) != 0, \
            'Agent1 has been tested but there is no result of that.'
            learning_results.append(
                [epoch, test_results[2], test_results[0], test_results[1]])

        # Next round
        start_idx = 1 - start_idx

    # Save Q-function of agent1
    np.savetxt(str('Models/' + filename + '.csv'), agent1.Q, delimiter=',')
    # Save stats for learning rate of agent1
    np.savetxt(str('Models/data/count_' + filename + '.csv'),
               agent1.count_state_action,
               delimiter=',')

    return learning_results
Пример #4
0
    np.savetxt(str('Models/data/count_' + filename + '.csv'),
               agent1.count_state_action,
               delimiter=',')

    return learning_results


if __name__ == "__main__":

    train(n_epochs=5000,
          epsilon=0.6,
          gamma=1.0,
          load_model=None,
          filename='greedy0_6_vsSelf_test',
          random_opponent=False,
          n_games_test=0,
          freq_test=-1,
          n_skip_games=-1,
          verbose=False)

    agent1 = RLAgent()
    agent1.load_model('greedy0_2_vsRandomvsSelf')
    agent2 = RLAgent()
    agent2.load_model('greedy0_6_vsSelf_test')
    results = compare_agents(agent1,
                             agent2,
                             n_games=10,
                             time_limit=None,
                             verbose=False)
    print(results)