Пример #1
0
 def test_log(self):
     log_dir = "./newtest/test_log.txt"
     if os.path.exists(log_dir):
         shutil.rmtree(log_dir)
     logger = Logger(log_dir)
     logger.log("test text")
     logger.log_performance(1, 1)
     logger.log_performance(2, 2)
     logger.log_performance(3, 3)
     logger.close_files()
     logger.plot('aaa')
Пример #2
0
random_agent = RandomAgent(action_num=eval_env.action_num)

# Other agents
env.model.create_agents({"mocsar_min": 4})
env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
env_agent_list.insert(0, agent)
env.set_agents(env_agent_list)

# Evaluation agent
eval_env.model.create_agents({"mocsar_random": 4})
eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
eval_agent_list.insert(0, agent)
eval_env.set_agents(eval_agent_list)

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

# Log Game info
logger.log('\n########## Game information ##########')
logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(
    env.game.num_players, env.game.num_cards, episode_num))

# logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}')
# logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}')
for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
Пример #3
0
    with tf.Session() as sess:

        # Set agents
        global_step = tf.Variable(0, name='global_step', trainable=False)
        agent = DQNAgent(sess,
                         scope='dqn',
                         action_num=env.action_num,
                         replay_memory_init_size=memory_init_size,
                         state_shape=env.state_shape,
                         mlp_layers=[10, 10])
        env.set_agents([agent])
        eval_env.set_agents([agent])
        sess.run(tf.global_variables_initializer())

        # Init a Logger to plot the learning curve
        logger = Logger(xlabel='timestep', ylabel='reward',
                        legend='DQN on Blackjack', log_path=log_path, csv_path=csv_path)

        for episode in range(episode_num // evaluate_every):

            # Generate data from the environment
            tasks = assign_task(evaluate_every, PROCESS_NUM)
            for task in tasks:
                INPUT_QUEUE.put((task, True, None, None))
            for _ in range(evaluate_every):
                trajectories = OUTPUT_QUEUE.get()

                # Feed transitions into agent memory, and train
                for ts in trajectories[0]:
                    agent.feed(ts)
            # Evaluate the performance
            reward = 0