def evaluate_new_neural_network(p_v_network_old, p_v_network_new, number_of_battles=4, plane_size=config.PLANE_SIZE): root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_old) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=80) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_old, max_simulation=80) new_pure_win = 0 print("------新黑旧白------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player1, player2) new_pure_win += winner print("------新白旧黑------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player2, player1) new_pure_win -= winner return new_pure_win
def evaluate_new_neural_network( self, p_v_network_old, p_v_network_new, number_of_battles=config.NUMBER_of_BATTLES_WHEN_EVALUATING, plane_size=config.PLANE_SIZE): # return True # 测试用 root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_old) player1 = p_v_mcts_player.MCTSPlayer( root=root1, p_v_network=p_v_network_new, max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING) player2 = p_v_mcts_player.MCTSPlayer( root=root2, p_v_network=p_v_network_old, max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING) new_pure_win = 0 logging.info("新白旧黑") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic( plane_size=config.PLANE_SIZE).play(player2, player1) new_pure_win -= winner if new_pure_win >= 0: new_pure_win = 0 logging.info("新黑旧白") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic( plane_size=config.PLANE_SIZE).play(player1, player2) new_pure_win += winner if new_pure_win >= 0: return True else: # return True # 测试用 return False else: # return True # 测试用 return False
def __init__(self, deck_cards, attributes): self.id = None self.generate_id() self.begin = time() self.last_interaction = time() self.active = True self.players = [Player(), Player()] self.game_state = game_logic.GameLogic(deck_cards=deck_cards, attributes=attributes)
def evaluate_new_neural_network(self, p_v_network_old, p_v_network_new, number_of_battles=11, plane_size=15): root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_old) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=50) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_old, max_simulation=50) new_pure_win = 0 for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player1, player2) new_pure_win += winner if new_pure_win > 2: new_pure_win = 0 for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic( ).play(player2, player1) new_pure_win += winner if new_pure_win > 2: return True else: return False else: return False
def evaluate_new_network_with_random_player(self, p_v_network_new, number_of_battles=25, plane_size=config.PLANE_SIZE, u=1, max_simulation=1): root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=max_simulation) player2 = random_player.RandomPlayer( gl.GameLogic(plane_size=plane_size)) new_pure_win = 0 print("------神黑随白------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player1, player2) new_pure_win += winner print("------神白随黑------") for i in range(number_of_battles): player1.refresh() player2.refresh() winner, plane_record, action_list, turn = play.PlayLogic().play( player2, player1) new_pure_win -= winner win_rate = (new_pure_win + number_of_battles * 2.0) / (2 * 2 * number_of_battles) with open( 'network/win_rate_max_simulation' + str(max_simulation) + '.txt', 'a+') as f: f.write(str(u) + "," + str(win_rate) + "\n") return new_pure_win
def play(self, player1, player2): self.game_logic = gl.GameLogic(plane_size=self.plane_size) action_probability_distribution_list = [] x, y, action_probability_distribution = player1.get_action_and_probability( ) action_probability_distribution_list.append( action_probability_distribution) self.game_logic.play(x, y) player2.get_opponents_action(x, y) result = self.game_logic.game_result_fast_version(x, y) while result == 2: if self.game_logic.current_player == 1: x, y, action_probability_distribution = player1.get_action_and_probability( ) action_probability_distribution_list.append( action_probability_distribution) self.game_logic.play(x, y) player2.get_opponents_action(x, y) else: x, y, action_probability_distribution = player2.get_action_and_probability( ) action_probability_distribution_list.append( action_probability_distribution) self.game_logic.play(x, y) player1.get_opponents_action(x, y) result = self.game_logic.game_result_fast_version(x, y) if result == 1: self.play_record.append(self.game_logic.plane.copy()) logging.info("黑胜") print("黑胜") return 1, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1 elif result == -1: self.play_record.append(self.game_logic.plane.copy()) logging.info("白胜") print("白胜") return -1, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1 elif result == 0: self.play_record.append(self.game_logic.plane.copy()) logging.info("和棋") print("和棋") return 0, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1 else: logging.warning("程序出错了,3秒后退出...") print("程序出错了,3秒后退出...") time.sleep(3) exit()
def __init__(self): self._running = True self._size = (450, 900) self._game = game_logic.GameLogic() self.BLACK = (0, 0, 0) self.GREY = (110, 110, 110) self.WHITE = (255, 255, 255) self.RED = (255, 0, 0) self.GREEN = (0, 255, 0) self.BLUE = (0, 0, 255) self.YELLOW = (255, 255, 0) self.PURPLE = (255, 0, 255) self.SKY = (0, 255, 255) self.ORANGE = (255, 127, 0) self.PINK = (255, 0, 127) self.LIME = (0, 255, 127) self.BROWN = (165, 42, 42)
import tkinter as tk import time import p_v_network import p_v_mcts_player import game_logic as gl game_logic = gl.GameLogic(plane_size=15) p_v_network_1 = p_v_network.P_V_Network() p_v_network_1.restore(0) p_v_network_2 = p_v_network.P_V_Network() p_v_network_2.restore(5) root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network_1) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network_2) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_1, max_simulation=160) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_2, max_simulation=160) def click_callback(event): x = event.x y = event.y print(x, y) if x > game_logic.plane_size * 30 + 15 or x < 15 or y > game_logic.plane_size * 30 + 15 or y < 15:
arr_data_augment_board = arr_data_augment_board.swapaxes(0, 1) arr_data_augment_board = arr_data_augment_board.swapaxes(1, 2) arr_data_augment_act = np.fliplr(arr_data_augment_act) board = np.concatenate((board, np.array([arr_data_augment_board]))) action_probability_distribution = np.concatenate((action_probability_distribution, np.array([arr_data_augment_act.reshape(size ** 2)]))) result = np.concatenate((result, np.array([[z]]))) return result, board, action_probability_distribution if __name__ == "__main__": import p_v_network import play self_play_game = play.PlayLogic(plane_size=15) data_generator = GenerateSelfPlayData(self_play_game) p_v_network = p_v_network.P_V_Network() root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network, max_simulation=5) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network, max_simulation=5) arr, result, y_ = data_generator.generate_self_play_data(player1, player2, number_of_games=2, numbuer_of_samples_in_each_game=8) print(arr.shape, result.shape, y_.shape)
def refresh(self): self.root = MCTSNode( game_logic.GameLogic(plane_size=self.root.state.plane_size), father_edge=None, p_v_network=self.p_v_network)
def __init__(self, plane_size=config.PLANE_SIZE): self.plane_size = plane_size self.game_logic = gl.GameLogic(plane_size=plane_size) self.play_record = []
import tkinter as tk import time import game_logic as gl game_logic = gl.GameLogic(plane_size=8) def click_callback(event): print("clicked at", event.x, event.y) x = event.x y = event.y if x > game_logic.plane_size * 30 + 15 or x < 15 or y > game_logic.plane_size * 30 + 15 or y < 15: return result_x = (x - 15) // 30 result_y = (y - 15) // 30 print(result_x, result_y) if game_logic.play(result_x, result_y): if game_logic.current_player == 1: canvas.create_oval(30 + result_x * 30 - 11, 30 + result_y * 30 - 11, 30 + result_x * 30 + 10, 30 + result_y * 30 + 10, fill='white') elif game_logic.current_player == -1: canvas.create_oval(30 + result_x * 30 - 11, 30 + result_y * 30 - 11, 30 + result_x * 30 + 10, 30 + result_y * 30 + 10, fill='black')
return 0, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1 else: logging.warning("程序出错了,3秒后退出...") print("程序出错了,3秒后退出...") time.sleep(3) exit() if __name__ == "__main__": import p_v_mcts_player_v2 import p_v_network import game_logic as gl pl = PlayLogic() p_v_network = p_v_network.P_V_Network() state1 = gl.GameLogic(plane_size=15) state2 = gl.GameLogic(plane_size=15) temp_player = p_v_mcts_player_v2.MCTSPlayer(root=None, p_v_network=p_v_network, max_simulation=5) action_probability_distribution, value = temp_player.get_current_action_probability_distribution_and_value_by_neural_network( p_v_network=p_v_network, state=state1) root1 = p_v_mcts_player_v2.MCTSNode(state1, None, action_probability_distribution, value) root2 = p_v_mcts_player_v2.MCTSNode(state2, None, action_probability_distribution, value) player1 = p_v_mcts_player_v2.MCTSPlayer(root=root1, p_v_network=p_v_network, max_simulation=5) player2 = p_v_mcts_player_v2.MCTSPlayer(root=root2,
def play_level(screen, player): background = pygame.Surface((c.SCREEN_WIDTH, c.SCREEN_HEIGHT)) clock = pygame.time.Clock() # Make washers and dryers washer_group, dryer_group = level_utils.make_washers_and_dryers((0, 0), 2, 2) # Images and sprites for player and laundry piles pile_images = image_utils.load_laundry_images('images/laundry/in_pile') pile_in = pile.Pile(15, 7, pile_images, c.LaundryState.UNWASHED) pile_out = pile.Pile(c.SCREEN_WIDTH - 105, 7, pile_images, c.LaundryState.DRIED) # Labels for laundry piles # TODO: make more dynamic/adjustable labels based on position of piles pile_in_label, pile_in_rect = level_utils.make_label(WHITE, 'inbox') pile_in_rect.bottomleft = (10, c.SCREEN_HEIGHT) pile_out_label, pile_out_rect = level_utils.make_label(WHITE, 'outbox') pile_out_rect.bottomright = (c.SCREEN_WIDTH - PADDING, c.SCREEN_HEIGHT) daily_clock = level_utils.DailyClock() # Generating orders orders = level_utils.generate_orders(order_num_min=8, order_num_max=8, load_num_min=1, load_num_max=1) customers = level_utils.generate_customers(orders) inactive_customers = pygame.sprite.Group( customers) # all customers start off inactive # Storing all sprites to master group all_sprites = pygame.sprite.Group(washer_group, dryer_group, pile_in, pile_out, player) logic = game_logic.GameLogic(orders, pile_in, pile_out, player) running = True while running: id = 0 time_delta = clock.tick(FPS) / 1000.0 mouse_up = False for event in pygame.event.get(): if event.type == pygame.QUIT: running = False return c.GameState.QUIT if event.type == pygame.MOUSEBUTTONUP and event.button == 1: print("click!!!") print("the current time is: " + str(pygame.time.get_ticks())) mouse_up = True if event.type > pygame.USEREVENT: id = event.type - pygame.USEREVENT print(id) if event.type == c.FAIL_STATE: return c.GameState.GAME_OVER if event.type == c.GAME_LOGIC_EVENT: logic.handle_event(event.type) if event.type == c.NOON_EVENT: for customer in inactive_customers: all_sprites.add(customer) inactive_customers.remove(customer) if not customers: print("final score: " + str(logic.score)) return c.GameState.GAME_OVER # TODO: update/change # Updating objects all_sprites.update(time_delta, pygame.mouse.get_pos(), mouse_up, logic, id) pile_in.update_y_pos() pile_out.update_y_pos() clock_text = daily_clock.get_updated_text(time_delta) clock_label, clock_rect = level_utils.make_label(WHITE, clock_text) clock_rect.topright = (c.SCREEN_WIDTH - PADDING, PADDING) # Drawing background, sprites, and labels screen.blit(background, (0, 0)) screen.blit(pile_in_label, pile_in_rect) screen.blit(pile_out_label, pile_out_rect) screen.blit(clock_label, clock_rect) all_sprites.draw(screen) # Updating display with the latest pygame.display.update()
import tkinter as tk import time import numpy as np import game_logic as gl import config game_logic = gl.GameLogic(plane_size=config.PLANE_SIZE) plane_record = np.load("plane_record/plane_record66.npy") root = tk.Tk() root.title("Gomoku") root.resizable(0, 0) root.wm_attributes("-topmost", 1) canvas = tk.Canvas(root, width=game_logic.plane_size * 30 + 30, height=game_logic.plane_size * 30 + 30, bd=0, bg='khaki', highlightthickness=0) for i in range(1, game_logic.plane_size + 1): canvas.create_line(i * 30, 30, i * 30, game_logic.plane_size * 30, width=2) for i in range(1, game_logic.plane_size + 1): canvas.create_line(30, i * 30, game_logic.plane_size * 30, i * 30, width=2) # 之所以是123,因为create_line宽度是2个像素,如果124的话会不合适 if game_logic.plane_size == 15: canvas.create_oval(116, 116, 123, 123, fill='black') canvas.create_oval(116, 356, 123, 363, fill='black') canvas.create_oval(356, 116, 363, 123, fill='black')
import numpy as np import p_v_network_v2 as p_v_network import p_v_mcts_player import game_logic as gl import play import config import random_player game_logic = gl.GameLogic(plane_size=config.PLANE_SIZE) # p_v_network_1 = p_v_network.P_V_Network() # p_v_network_1.restore(0) # root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) # player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=2) player1 = random_player.RandomPlayer( gl.GameLogic(plane_size=config.PLANE_SIZE)) p_v_network_2 = p_v_network.P_V_Network() p_v_network_2.restore(0) root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=config.PLANE_SIZE), father_edge=None, p_v_network=p_v_network_2) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_2, max_simulation=2) def evaluate_new_neural_network(player1, player2, number_of_battles=1): new_pure_win = 0 print("------新黑旧白------")
surface=window, grid_dim=constants.GRID_DIM) player1 = util.Player(window, constants.PLAYER_1_COLOR, board, constants.PLAYER_1_CHECKER_POSITIONS, name="Player Red") player2 = util.Player(window, constants.PLAYER_2_COLOR, board, constants.PLAYER_2_CHECKER_POSITIONS, name="Player Blue") select = util.Selector(surface=window, des_color=constants.DESTINATION_COLOR, loc_color=constants.LOCATION_COLOR) logic = game_logic.GameLogic(player1=player1, player2=player2, board=board) # Main Game Loop while run: # If not paused, draw all board and selector if not pause: draw(board, select) pygame.display.flip() for event in pygame.event.get(): if event.type == pygame.QUIT: run = False if event.type == pygame.KEYDOWN: if event.key == pygame.K_SPACE: if pause:
def train_and_update(self, plane_size=15, number_of_epoch=1, number_of_update_network=200, number_of_games=200, numbuer_of_samples_in_each_game=9, min_batch=100, max_simulation=3): ''' :param number_of_epoch: :param number_of_update_network: :param number_of_games: :param numbuer_of_samples_in_each_game: :param min_batch: 需要是 number_of_games 乘以 numbuer_of_samples_in_each_game 的积的约数 :return: ''' p_v_network_new = p_v_network.P_V_Network() p_v_network_old = p_v_network.P_V_Network() path = "./network" if not os.path.exists(path): os.makedirs(path) for u in range(number_of_update_network): print("the %dth update" % (u)) p_v_network_new.save(u) self_play_game = play.PlayLogic(plane_size=plane_size) data_generator = generate_self_play_data.GenerateSelfPlayData( self_play_game) root1 = p_v_mcts_player.MCTSNode( gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) root2 = p_v_mcts_player.MCTSNode( gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new) player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=max_simulation) player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network_new, max_simulation=max_simulation) plane_records, game_result_, y_ = data_generator.generate_self_play_data( player1, player2, number_of_games=number_of_games, numbuer_of_samples_in_each_game=numbuer_of_samples_in_each_game ) for e in range(number_of_epoch): for i in range( int(number_of_games * numbuer_of_samples_in_each_game / min_batch)): # min-batch 100, 由于只有1000个局面样本,所以只循环10次 batch = [ plane_records[i * min_batch:(i + 1) * min_batch], game_result_[i * min_batch:(i + 1) * min_batch], y_[i * min_batch:(i + 1) * min_batch] ] if e % 10 == 0: # loss = p_v_network_new.loss.eval(feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False}) # p_v_network_new.sess.run([p_v_network_new.loss.eval], feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False}) # print("step %d, loss %g" % (i, loss)) pass p_v_network_new.sess.run( [p_v_network_new.train_step], feed_dict={ p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: True }) if self.evaluate_new_neural_network(p_v_network_old, p_v_network_new, plane_size=plane_size, number_of_battles=5): print("old_network changed") p_v_network_old.restore(u)