def test_log(self): log_dir = "./newtest/test_log.txt" if os.path.exists(log_dir): shutil.rmtree(log_dir) logger = Logger(log_dir) logger.log("test text") logger.log_performance(1, 1) logger.log_performance(2, 2) logger.log_performance(3, 3) logger.close_files() logger.plot('aaa')
random_agent = RandomAgent(action_num=eval_env.action_num) # Other agents env.model.create_agents({"mocsar_min": 4}) env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)] env_agent_list.insert(0, agent) env.set_agents(env_agent_list) # Evaluation agent eval_env.model.create_agents({"mocsar_random": 4}) eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)] eval_agent_list.insert(0, agent) eval_env.set_agents(eval_agent_list) # Init a Logger to plot the learning curve logger = Logger(log_dir) # Log Game info logger.log('\n########## Game information ##########') logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format( env.game.num_players, env.game.num_cards, episode_num)) # logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}') # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}') for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]:
with tf.Session() as sess: # Set agents global_step = tf.Variable(0, name='global_step', trainable=False) agent = DQNAgent(sess, scope='dqn', action_num=env.action_num, replay_memory_init_size=memory_init_size, state_shape=env.state_shape, mlp_layers=[10, 10]) env.set_agents([agent]) eval_env.set_agents([agent]) sess.run(tf.global_variables_initializer()) # Init a Logger to plot the learning curve logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on Blackjack', log_path=log_path, csv_path=csv_path) for episode in range(episode_num // evaluate_every): # Generate data from the environment tasks = assign_task(evaluate_every, PROCESS_NUM) for task in tasks: INPUT_QUEUE.put((task, True, None, None)) for _ in range(evaluate_every): trajectories = OUTPUT_QUEUE.get() # Feed transitions into agent memory, and train for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance reward = 0