def test_add_point(self): logger = Logger(xlabel="x", ylabel="y", legend="test", csv_path="./newtest/test_csv.csv") logger.add_point(x=1, y=1) self.assertEqual(logger.xs[0], 1) self.assertEqual(logger.ys[0], 1) with self.assertRaises(ValueError): logger.add_point(None, None)
def test_add_point(self): logger = Logger(xlabel="x", ylabel="y", legend="test", csv_path="./newtest/test_csv.csv") logger.add_point(x=1, y=1) self.assertEqual(logger.xs[0], 1) self.assertEqual(logger.ys[0], 1)
def test_make_plot(self): logger = Logger(xlabel="x", ylabel="y", legend="test") for x in range(10): logger.add_point(x=x, y=x * x) self.assertEqual(9 * 9, logger.ys[9]) save_path = './newtest/test.png' save_dir = os.path.dirname(save_path) if os.path.exists(save_dir): shutil.rmtree(save_dir) logger.make_plot(save_path=save_path) shutil.rmtree(save_dir)
reward = 0 reward_list = [] for eval_episode in range(evaluate_num): print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode, evaluate_num, total_game_played, time_difference_good_format( seconds, time.time())), end='') _, payoffs = eval_env.run(is_training=False) total_game_played += 1 reward_list.append(payoffs[0]) reward += payoffs[0] logger.log('\n########## Evaluation - Episode {} ##########'.format(episode)) logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num)) # Add point to logger logger.add_point(x=env.timestep, y=float(reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list) # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
_, payoffs = eval_env.run(is_training=False) total_game_played += 1 reward_random_list.append(payoffs[0]) reward_random += payoffs[0] taking_list.append(eval_env.game.players[0].taking) logger_random.log( '\n########## Evaluation Against Random - Episode {} ##########' .format(episode)) logger_random.log( 'Timestep: {} Average reward against random is {}'.format( env.timestep, float(reward_random) / evaluate_num)) # Add point to logger logger_random.add_point(x=env.timestep, y=float(reward_random) / evaluate_num) # Make plot logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png') logger_random.make_plot_hist( save_path_1=figure_path_random + str(episode) + '_hist.png', save_path_2=figure_path_random + str(episode) + '_freq.png', reward_list=reward_random_list, taking_list=taking_list) # Eval against last agent reward_opponent = 0 reward_opponent_list = [] taking_list = [] eval_env.set_agents([agent] + [opponent_agent] *
seconds, time.time())), end='') _, payoffs = eval_env.run(is_training=False) total_game_played += 1 reward_random_list.append(payoffs[0]) reward_random += payoffs[0] taking_list.append(eval_env.game.players[0].taking) logger_random.log('\n########## Evaluation Against Random - Episode {} ##########'.format(episode)) logger_random.log( 'Timestep: {} Average reward against random is {}'.format(env.timestep, float(reward_random) / evaluate_num)) # Add point to logger logger_random.add_point(x=episode, y=float(reward_random) / evaluate_num) # Make plot logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png') logger_random.make_plot_hist(save_path_1=figure_path_random + str(episode) + '_hist.png', save_path_2=figure_path_random + str(episode) + '_freq.png', reward_list=reward_random_list, taking_list=taking_list) print('\rEPISODE {} - Number of game played {} - {}'.format(episode, total_game_played, time_difference_good_format(seconds, time.time())), end='') # Generate data from the environment trajectories, _ = env.run(is_training=True) total_game_played += 1
rl_loss = agents[i].train_rl() sl_loss = agents[i].train_sl() print( '\rINFO - Agent {}, step {}, rl-loss: {}, sl-loss: {}'. format(i, step_counters[i], rl_loss, sl_loss), end='') # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: reward = 0 eval_episode = 0 for eval_episode in range(evaluate_num): _, payoffs = eval_env.run(is_training=False) reward += payoffs[0] logger.log('\n########## Evaluation ##########') logger.log('episode: {} Average reward is {}'.format( episode / evaluate_every, float(reward) / evaluate_num)) # Add point to logger logger.add_point(x=episode / evaluate_every, y=float(reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
sess.run(tf.compat.v1.global_variables_initializer()) # STATS ON TAKING BID FOR FIRST PLAYER TO SPEAK print('\n------------------------') print('---- Stats on Bids -----') print('------------------------') for i in range(num_tests): if i * 100 % num_tests == 0: print('\rProgress Bids: {}%'.format(int(i * 100 / num_tests)), end='') state, player_id = env.init_game() points_in_hand = get_hand_value(env.game.players[player_id].hand) bouts_in_hand = get_nb_bouts(env.game.players[player_id].hand) action = env.decode_action(agent.eval_step(state)) logger_taking.add_point(x=points_in_hand, y=bouts_in_hand, z=action.get_bid_order()) # Showing usual results against himself for this agent print('\n------------------------') print('---- Stats on Games ----') print('------------------------') # Make environment env = rlcard.make('tarot') global_step = tf.Variable(0, name='global_step', trainable=False) sess.run(tf.compat.v1.global_variables_initializer()) for i in range(num_games): hand_value = dict() nb_bouts = dict() initial_hand = dict() # PRINTS HERE TO FORCE THE CODE TO CONTINUE (WEIRD PROBLEM IS NOTHING IS ASKED TO BE PRINTED HERE)
def test_make_plot(self): logger = Logger(xlabel="x", ylabel="y", legend="test") for x in range(10): logger.add_point(x=x, y=x * x) self.assertEqual(9 * 9, logger.ys[9]) logger.make_plot(save_path='./newtest/test.png')
print('\rIteration {}'.format(episode), end='\n') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: #agent.save() # Save model reward = 0 for eval_episode in range(evaluate_num): his, payoffs = eval_env.run(is_training=False) reward += payoffs[0] logger_reward.log('\n########## Evaluation ##########') logger_reward.log('Iteration: {} Average reward is {}'.format( episode, float(reward) / evaluate_num)) # Add point to logger logger_reward.add_point(x=episode, y=float(reward) / evaluate_num) import time start = time.perf_counter() exploitability = agent.compute_exploitability(evaluate_num) end = time.perf_counter() logger.log('episode: {} cost {:10}s ,exploitability is {}'.format( episode, end - start, exploitability)) logger.add_point(x=episode, y=exploitability) print("\n") # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') logger_reward.make_plot(save_path=figure_path + str(episode) + 'reward' + '.png') # Make the final plot
is_training=False) bet_reward += bet_reward_sum change_reward += change_reward_sum bet_logger.log('\n########## Evaluation ##########') bet_logger.log( 'Timestep: {} Average bet reward is {}. Average change reward is {}' .format(env.timestep, float(bet_reward) / evaluate_num, float(change_reward) / evaluate_num)) # send_slack('Episode: {} Average bet reward is {}. Average change reward is {}'.format(episode, float(bet_reward)/evaluate_num, float(change_reward)/evaluate_num)) # Add point to logger bet_logger.add_point(x=env.timestep, y=float(bet_reward) / evaluate_num) change_logger.add_point(x=env.timestep, y=float(change_reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: bet_logger.make_plot(save_path=figure_path + 'bet/' + str(episode) + '.png') change_logger.make_plot(save_path=figure_path + 'change/' + str(episode) + '.png') if episode % checkpoint_every == 0 and episode > 0: bet_path, change_path = agent.save(checkpoint_path, episode) print('Saved to {}, {}'.format(bet_path, change_path)) # Make the final plot