def evaluate_new_neural_network(p_v_network_old, p_v_network_new, number_of_battles=4, plane_size=config.PLANE_SIZE): root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_old) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=80) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_old, max_simulation=80) new_pure_win = 0 print("------新黑旧白------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player1, player2) new_pure_win += winner print("------新白旧黑------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player2, player1) new_pure_win -= winner return new_pure_win
def evaluate_new_neural_network( self, p_v_network_old, p_v_network_new, number_of_battles=config.NUMBER_of_BATTLES_WHEN_EVALUATING, plane_size=config.PLANE_SIZE): # return True # 测试用 root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_old) player1 = p_v_mcts_player.MCTSPlayer( root=root1, p_v_network=p_v_network_new, max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING) player2 = p_v_mcts_player.MCTSPlayer( root=root2, p_v_network=p_v_network_old, max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING) new_pure_win = 0 logging.info("新白旧黑") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic( plane_size=config.PLANE_SIZE).play(player2, player1) new_pure_win -= winner if new_pure_win >= 0: new_pure_win = 0 logging.info("新黑旧白") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic( plane_size=config.PLANE_SIZE).play(player1, player2) new_pure_win += winner if new_pure_win >= 0: return True else: # return True # 测试用 return False else: # return True # 测试用 return False
def evaluate_new_neural_network(self, p_v_network_old, p_v_network_new, number_of_battles=11, plane_size=15): root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_old) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=50) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_old, max_simulation=50) new_pure_win = 0 for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player1, player2) new_pure_win += winner if new_pure_win > 2: new_pure_win = 0 for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic( ).play(player2, player1) new_pure_win += winner if new_pure_win > 2: return True else: return False else: return False
def evaluate_new_network_with_random_player(self, p_v_network_new, number_of_battles=25, plane_size=config.PLANE_SIZE, u=1, max_simulation=1): root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=max_simulation) player2 = random_player.RandomPlayer( gl.GameLogic(plane_size=plane_size)) new_pure_win = 0 print("------神黑随白------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player1, player2) new_pure_win += winner print("------神白随黑------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player2, player1) new_pure_win -= winner win_rate = (new_pure_win + number_of_battles * 2.0) / (2 * 2 * number_of_battles) with open( 'network/win_rate_max_simulation' + str(max_simulation) + '.txt', 'a+') as f: f.write(str(u) + "," + str(win_rate) + "\n") return new_pure_win
import config import random_player game_logic = gl.GameLogic(plane_size=config.PLANE_SIZE) # p_v_network_1 = p_v_network.P_V_Network() # p_v_network_1.restore(0) # root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) # player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=2) player1 = random_player.RandomPlayer( gl.GameLogic(plane_size=config.PLANE_SIZE)) p_v_network_2 = p_v_network.P_V_Network() p_v_network_2.restore(0) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=config.PLANE_SIZE), father_edge=None, p_v_network=p_v_network_2) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_2, max_simulation=2) def evaluate_new_neural_network(player1, player2, number_of_battles=1): new_pure_win = 0 print("------新黑旧白------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record_1, action_list, turn = play.PlayLogic().play( player1, player2) new_pure_win += winner
def train_and_update(self, plane_size=15, number_of_epoch=1, number_of_update_network=200, number_of_games=200, numbuer_of_samples_in_each_game=9, min_batch=100, max_simulation=3): ''' :param number_of_epoch: :param number_of_update_network: :param number_of_games: :param numbuer_of_samples_in_each_game: :param min_batch: 需要是 number_of_games 乘以 numbuer_of_samples_in_each_game 的积的约数 :return: ''' p_v_network_new = p_v_network.P_V_Network() p_v_network_old = p_v_network.P_V_Network() path = "./network" if not os.path.exists(path): os.makedirs(path) for u in range(number_of_update_network): print("the %dth update" % (u)) p_v_network_new.save(u) self_play_game = play.PlayLogic(plane_size=plane_size) data_generator = generate_self_play_data.GenerateSelfPlayData( self_play_game) root1 = p_v_mcts_player.MCTSNode( gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode( gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=max_simulation) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_new, max_simulation=max_simulation) plane_records, game_result_, y_ = data_generator.generate_self_play_data( player1, player2, number_of_games=number_of_games, numbuer_of_samples_in_each_game=numbuer_of_samples_in_each_game ) for e in range(number_of_epoch): for i in range( int(number_of_games * numbuer_of_samples_in_each_game / min_batch)): # min-batch 100, 由于只有1000个局面样本,所以只循环10次 batch = [ plane_records[i * min_batch:(i + 1) * min_batch], game_result_[i * min_batch:(i + 1) * min_batch], y_[i * min_batch:(i + 1) * min_batch] ] if e % 10 == 0: # loss = p_v_network_new.loss.eval(feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False}) # p_v_network_new.sess.run([p_v_network_new.loss.eval], feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False}) # print("step %d, loss %g" % (i, loss)) pass p_v_network_new.sess.run( [p_v_network_new.train_step], feed_dict={ p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: True }) if self.evaluate_new_neural_network(p_v_network_old, p_v_network_new, plane_size=plane_size, number_of_battles=5): print("old_network changed") p_v_network_old.restore(u)
arr_data_augment_board = arr_data_augment_board.swapaxes(0, 1) arr_data_augment_board = arr_data_augment_board.swapaxes(1, 2) arr_data_augment_act = np.fliplr(arr_data_augment_act) board = np.concatenate((board, np.array([arr_data_augment_board]))) action_probability_distribution = np.concatenate((action_probability_distribution, np.array([arr_data_augment_act.reshape(size ** 2)]))) result = np.concatenate((result, np.array([[z]]))) return result, board, action_probability_distribution if __name__ == "__main__": import p_v_network import play self_play_game = play.PlayLogic(plane_size=15) data_generator = GenerateSelfPlayData(self_play_game) p_v_network = p_v_network.P_V_Network() root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network, max_simulation=5) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network, max_simulation=5) arr, result, y_ = data_generator.generate_self_play_data(player1, player2, number_of_games=2, numbuer_of_samples_in_each_game=8) print(arr.shape, result.shape, y_.shape)