def test_log(self): log_dir = "./newtest/test_log.txt" if os.path.exists(log_dir): shutil.rmtree(log_dir) logger = Logger(log_dir) logger.log("test text") logger.log_performance(1, 1) logger.log_performance(2, 2) logger.log_performance(3, 3) logger.close_files() logger.plot('aaa')
env.model.create_agents({"mocsar_min": 4}) env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)] env_agent_list.insert(0, agent) env.set_agents(env_agent_list) # Evaluation agent eval_env.model.create_agents({"mocsar_random": 4}) eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)] eval_agent_list.insert(0, agent) eval_env.set_agents(eval_agent_list) # Init a Logger to plot the learning curve logger = Logger(log_dir) # Log Game info logger.log('\n########## Game information ##########') logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format( env.game.num_players, env.game.num_cards, episode_num)) # logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}') # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}') for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0:
env.model.create_agents({"mocsar_min": 4}) env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)] env_agent_list.insert(0, agent) env.set_agents(env_agent_list) # Evaluation agent eval_env.model.create_agents({"mocsar_random": 4}) eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)] eval_agent_list.insert(0, agent) eval_env.set_agents(eval_agent_list) # Init a Logger to plot the learning curve logger = Logger(log_dir) ## Log Game info logger.log('\n########## Game information ##########') logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format( env.game.num_players, env.game.num_cards, episode_num)) # logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}') # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}') for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0:
trajectories = OUTPUT_QUEUE.get() # Feed transitions into agent memory, and train for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance reward = 0 tasks = assign_task(evaluate_num, PROCESS_NUM) variables = tf.contrib.slim.get_variables(scope="dqn", collection=tf.GraphKeys.TRAINABLE_VARIABLES) variables = [var.eval() for var in variables] for task in tasks: INPUT_QUEUE.put((task, False, variables, agent.total_t)) for _ in range(evaluate_num): payoffs = OUTPUT_QUEUE.get() reward += payoffs[0] logger.log('\n########## Evaluation ##########') logger.log('Average reward is {}'.format(float(reward)/evaluate_num)) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('DQN') # Save model save_dir = 'models/blackjack_dqn' if not os.path.exists(save_dir): os.makedirs(save_dir) saver = tf.train.Saver() saver.save(sess, os.path.join(save_dir, 'model'))
env.model.create_agents({"mocsar_min": 4}) env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)] env_agent_list.insert(0, agent) env.set_agents(env_agent_list) # Evaluation agent eval_env.model.create_agents({"mocsar_random": 4}) eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)] eval_agent_list.insert(0, agent) eval_env.set_agents(eval_agent_list) # Init a Logger to plot the learning curve logger = Logger(log_dir) # Log Game info logger.log('\n########## Game information, NFSP, RuleAgents, Pytorch ##########') logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(env.game.num_players, env.game.num_cards, conf.get_int('episode_num'))) for episode in range(conf.get_int('episode_num')): # First sample a policy for the episode agent.sample_episode_policy() # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts)