Exemplo n.º 1
0
 def render(self, mode='human', close=False):
     """
     Gym function called at the end of a process.
     """
     print("Rendering")
     if not self.validation_process:
         np.save(self.test_folder + '/memory_' + str(self.test_starts_index) + '.npy', self.memory)
         title_act = '/test_actions_' + str(self.test_starts_index)
         plt.plot_actions(self.memory, self.long_actions, self.short_actions, self.test_folder, title_act)
Exemplo n.º 2
0
 def plot_actions(self):
     plt.plot_actions(self.folder, self.memory, self.long_actions, self.short_actions)
Exemplo n.º 3
0
def main():
    flags = {
        'players': 2,
        'num_episodes': 1,
        'num_test_episodes': 1,
        'agent_class': 'SimpleAgent',
        'alpha': 0.1,
        'gamma': 0.9,
        'seed': 12345
    }
    options, arguments = getopt.getopt(sys.argv[1:], '', [
        'players=', 'num_episodes=', 'num_test_episodes=', 'agent_class=',
        'seed=', 'alpha=', 'gamma='
    ])
    if arguments:
        sys.exit('usage: rl_env_example.py [options]\n'
                 '--players       number of players in the game.\n'
                 '--num_episodes  number of game episodes to run.\n'
                 '--num_test_episodes  number of test game episodes to run.\n'
                 '--alpha         step size for Q-learning. \n'
                 '--gamma         discount rate for Q-learning. \n'
                 '--seed          random generator seed. \n'
                 '--agent_class   {}'.format(' or '.join(
                     AGENT_CLASSES.keys())))
    for flag, value in options:
        flag = flag[2:]  # Strip leading --.
        flags[flag] = type(flags[flag])(value)

    random.seed(flags['seed'])

    form_str = 'players_{}-episodes_{}-test_episodes_{}-agent_{}-alpha_{}-gamma_{}-seed_{}'
    suffix = form_str.format(flags['players'], flags['num_episodes'],
                             flags['num_test_episodes'], flags['agent_class'],
                             flags['alpha'], flags['gamma'], flags['seed'])
    train_suffix = suffix + '-train'
    test_suffix = suffix + '-test'

    runner = Runner(flags)

    print('*' * 100)
    print('TRAINING')
    agents, rewards, agents_rewards, actions = runner.run(
        flags['num_episodes'])
    print('*' * 100)

    player_num = len(agents)

    for ar in agents_rewards:
        plot_learning_rewards(agents_rewards[ar], ar, train_suffix)
        plot_actions(actions[ar], ar, train_suffix)
    plot_hist(rewards, -1, train_suffix, flags['players'])

    if flags['agent_class'] == 'QAgent':
        greedy_agents = [
            GreedyAgent(runner.agent_config, agent.Q) for agent in agents
        ]
        print('*' * 100)
        print('TESTING')
        _, rewards, agents_rewards, actions = runner.run(
            flags['num_test_episodes'], greedy_agents)
        print('*' * 100)
        plot_hist(rewards, -1, test_suffix, flags['players'])
        for ar in agents_rewards:
            plot_actions(actions[ar], ar, test_suffix)

    print('rewards:\n{}'.format(rewards))
    print('agent 0 rewards:\n{}'.format(agents_rewards[0]))
    print('agent 1 rewards:\n{}'.format(agents_rewards[1]))