def play_n_plot(): r1 = RandomPlayer() r2 = RandomPlayer() game_number, p1_wins, p2_wins, draws = evaluate_players(r1, r2, num_battles=100) p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-', game_number, p2_wins, 'b-') plt.show()
for draw_reward in range(loss_reward + 1, max_reward + 1): tf.reset_default_graph() TFSessionManager.set_session(tf.Session()) sess = TFSessionManager.get_session() nnplayer = DirectPolicyAgent("PolicyLearner1", loss_value=loss_reward, draw_value=draw_reward) rm_player = RndMinMaxAgent() sess.run(tf.global_variables_initializer()) game_number, p1_wins, p2_wins, draws = evaluate_players( nnplayer, rm_player, num_battles=1000, silent=True) # , num_battles = 20) print("With loss reward {} and draw reward {} we get draws: {}".format( loss_reward, draw_reward, draws[-1])) rewards[loss_reward - min_reward, draw_reward - min_reward] = draws[-1] TFSessionManager.set_session(None) fig, ax = plt.subplots() im = ax.imshow(rewards) reward_range = np.arange(num_reward_steps + 1) # We want to show all ticks...
# nnplayer = EGreedyNNQPlayer("QLearner1")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0) # nn2player = EGreedyNNQPlayer("QLearner2")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0) mm_player = MinMaxAgent() rndplayer = RandomPlayer() rm_player = RndMinMaxAgent() TFSessionManager.set_session(tf.Session()) sess = TFSessionManager.get_session() writer = tf.summary.FileWriter(TENSORLOG_DIR, sess.graph) nnplayer.writer = writer sess.run(tf.global_variables_initializer()) # game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer, num_battles=10000) #, num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer) #, num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players( mm_player, nnplayer, num_battles=300) # , num_battles = 20) game_number, p1_wins, p2_wins, draws = evaluate_players( nnplayer, rm_player, num_battles=1000, writer=writer) # , num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players(nnplayer, rndplayer, num_battles=100) # , num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players(mm_player, nn2player, num_battles=100) # , num_battles = 20) writer.close() p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-', game_number, p2_wins, 'b-') plt.show() TFSessionManager.set_session(None)
if not train: TFSessionManager.load_session('models/SimpleNNQPlayer') sess = TFSessionManager.get_session() if train: sess.run(tf.global_variables_initializer()) # num battles nb = 500 # games per battle gpb = 100 game_number, p1_wins, p2_wins, draws = evaluate_players(dddplayer, rmmplayer, num_battles=nb, games_per_battle=gpb) if train: TFSessionManager.save_session('models/models_session2') plt.plot(game_number, draws, color=(0.7, 0.7, 0.7), label='draws') plt.plot(game_number, p1_wins, 'r-', label='player 1') plt.plot(game_number, p2_wins, 'y-', label='player 2') plt.xlabel('battle iterations ({} games per battle)'.format(gpb)) plt.ylabel('battle winning ratio (%)') plt.legend(loc='best') plt.show() TFSessionManager.set_session(None)
import os import random print('start') pickle_in = open('player1.pickle', 'rb') qvals = pickle.load(pickle_in) print(os.stat('player1.pickle').st_size / (1024 * 1024)) print(qvals[(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4)]) total = 0 for key in qvals.keys(): total += qvals[key].count(0) print(total / len(qvals)) discreetPlayer = DiscreetQValuesPlayer(training=False, qValues=qvals) rndplayer = RandomPlayer() game_number, p1_wins, p2_wins, draws, allPos = evaluate_players( discreetPlayer, rndplayer, games_per_battle=100000, num_battles=1) total = 0 numSeen = 0 numNotSeen = 0 for key in allPos: if key in qvals: total += qvals[key].count(0) numSeen += 1 else: numNotSeen += 1 print(total / numSeen) print(numSeen) print(numNotSeen) print(numSeen / (numNotSeen + numSeen)) for i in range(3): print(random.choice(tuple(allPos)))
from tic_tac_toe.ExpDoubleDuelQPlayer import ExpDoubleDuelQPlayer tf.reset_default_graph() nnplayer = ExpDoubleDuelQPlayer( "QLearner1") # , win_value=100.0, loss_value=-100.0) # nn2player = EGreedyNNQPlayer("QLearner2", win_value=100.0, loss_value=-100.0) # nnplayer = EGreedyNNQPlayer("QLearner1")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0) # nn2player = EGreedyNNQPlayer("QLearner2")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0) mm_player = MinMaxAgent() rndplayer = RandomPlayer() rm_player = RndMinMaxAgent() TFSessionManager.set_session(tf.Session()) TFSessionManager.get_session().run(tf.global_variables_initializer()) # game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer, num_battles=10000) #, num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer) #, num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players( mm_player, nnplayer, num_battles=300) # , num_battles = 20) game_number, p1_wins, p2_wins, draws = evaluate_players( rm_player, nnplayer, num_battles=300) # , num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players(nnplayer, rndplayer, num_battles=100) # , num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players(mm_player, nn2player, num_battles=100) # , num_battles = 20) p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-', game_number, p2_wins, 'b-') plt.show() TFSessionManager.set_session(None)
self.random_move_prob *= self.random_move_decrease if self.writer is not None: self.writer.add_summary(summary, self.game_counter) summary = tf.Summary(value=[ tf.Summary.Value(tag='Random_Move_Probability', simple_value=self.random_move_prob) ]) self.writer.add_summary(summary, self.game_counter) TFSN.get_session().run(self.graph_copy_op) tf.reset_default_graph() nnplayer = deepPlayer("QLearner1") rndplayer = RandomPlayer() TFSN.set_session(tf.Session()) TFSN.get_session().run(tf.global_variables_initializer()) game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer, games_per_battle=1000, num_battles=5) p = plt.plot(game_number, p1_wins, 'g-', game_number, p2_wins, 'b-') plt.show() TFSN.set_session(None)