def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0): if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) printmoves = player1version == -1 or player2version == -1 scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first, printmoves) return (scores, memory, points, sp_scores)
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0, board_size=3): if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: player1_NN = ResidualCNN(config.REG_CONST, config.LEARNING_RATE, (3,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS, config.MOMENTUM) # player1_NN = ResidualCNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, # config.HIDDEN_CNN_LAYERS, config.MOMENTUM) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) player1 = AlphaTicTacToeAgent('player1', env.state_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = ResidualCNN(config.REG_CONST, config.LEARNING_RATE, (3,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS, config.MOMENTUM) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = AlphaTicTacToeAgent('player2', env.state_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first, board_size) return scores, memory, points, sp_scores
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): env = Game() if player1version == -1: player1 = User("user1", env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: name = env.name + "{0:0>4}".format(player1version) if Provider.getNetByName(name) == None: return player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) netName = env.name + "{0:0>4}".format(player1version) player1 = Agent(netName, env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: name = input('enter username: ') user2 = Provider.getPersonByName(name) player2 = User(user2.name, env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: name = env.name + "{0:0>4}".format(player2version) if Provider.getNetByName(name) == None: return player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) net2Name = env.name + "{0:0>4}".format(player2version) player2 = Agent(net2Name, env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
def __init__(self, env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): self.EPISODES = EPISODES self.turns_until_tau0 = turns_until_tau0 self.logger = logger self.goes_first = goes_first if player1version == -1: self.player1 = User('player1', env.state_size, env.action_size) else: self.player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: self.player1_network = self.player1_NN.read( env.name, run_version, player1version) self.player1_NN.model.set_weights( self.player1_network.get_weights()) self.player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, self.player1_NN) if player2version == -1: self.player2 = User('player2', env.state_size, env.action_size) else: self.player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: self.player2_network = self.player2_NN.read( env.name, run_version, player2version) self.player2_NN.model.set_weights( self.player2_network.get_weights()) self.player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, self.player2_NN)
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): #-1代表的是玩家 if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: #Residual_CNN 返回的是一个x player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: #如果不是玩家,则读取训练好的版本及相关权重 player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) #对其进行模拟,以及mcts树的构建 player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
def play_matches_between_networks(game, p1_version, p2_version, EPISODES, turns_to_tau0, goes_first=0): # player 1 agent if p1_version == -1: player1 = User('user_player1', game.state_size, game.move_size) elif p1_version == -2: player1= Heuristic('random_player1', game.state_size, game.move_size, 'random') elif p1_version == -3: player1 = Heuristic('box_player1', game.state_size, game.move_size, 'box') elif p1_version == -4: player1 = Heuristic('avoid_player1', game.state_size, game.move_size, 'avoid') else: player1_NN = residual_CNN(config.REG_CONST, config.LEARNING_RATE, game.input_shape, game.move_size, config.HIDDEN_CNN_LAYERS) if p1_version > 0: p1_network = player1_NN.read(p1_version) player1_NN.model.set_weights(p1_network.get_weights()) player1 = Agent('NN_player1', game.state_size, game.move_size, config.MCTS_SIMS, config.CPUCT, player1_NN) # player 2 agent if p2_version == -1: player2 = User('user_player2', game.state_size, game.move_size) elif p2_version == -2: player2 = Heuristic('random_player2', game.state_size, game.move_size, 'random') elif p2_version == -3: player2 = Heuristic('box_player2', game.state_size, game.move_size, 'box') elif p2_version == -4: player2 = Heuristic('avoid_player2', game.state_size, game.move_size, 'avoid') else: player2_NN = residual_CNN(config.REG_CONST, config.LEARNING_RATE, game.input_shape, game.move_size, config.HIDDEN_CNN_LAYERS) if p2_version > 0: p2_network = player2_NN.read(p2_version) player2_NN.model.set_weights(p2_network.get_weights()) player2 = Agent('NN_player2', game.state_size, game.move_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = play_matches(player1, player2, EPISODES, turns_to_tau0, goes_first=goes_first) return scores, memory, points, sp_scores
best_player_version) best_NN.model.set_weights(m_tmp.get_weights()) #otherwise just ensure the weights on the two players are the same else: best_player_version = 0 print('\n') last_time = time.time() winname = "AI_VISION" cv2.namedWindow(winname, cv2.WINDOW_NORMAL) # Create a named window cv2.resizeWindow(winname, WINDOW_SIZE, WINDOW_SIZE) cv2.moveWindow(winname, 950, 50) # Move window right corner of screen board = init_board() player1 = User('player1', env.state_size, env.action_size) #print('creating AI') player2 = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN) player2_tokens = BLACK_TOKEN action_i, action_j, show_action, player2_before_act_tokens_count = -1, -1, False, 0 #display_board(board) print('AI ready to play') while (True): screen = ImageGrab.grab(bbox=(SCREEN_BOX)) numpy_screen = np.array(screen) new_screen, board = drawGreen_circels(numpy_screen, board) if (show_action): # print((int(action_i*CELL_SIZE+BOARD_PADDING), int(action_j*CELL_SIZE+BOARD_PADDING))) new_screen = draw_action( new_screen, (int(action_i * (CELL_SIZE + 3) + BOARD_PADDING),
def test(): from agent import User, AI AI = AI() User = User() b = board(3) b.play(AI, User)
# initialize model best_NN = residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + game.grid_shape, game.move_size, config.HIDDEN_CNN_LAYERS) # load model best_version = config.INITIAL_MODEL_VERSION print('Loading model ' + str(best_version) + '...') model_temp = best_NN.read(best_version) best_NN.model.set_weights(model_temp.get_weights()) print('\n') # create players best_player = Agent('best_player', game.state_size, game.move_size, config.MCTS_SIMS, config.CPUCT, best_NN) user_player = User('player1', game.state_size, game.move_size) iteration = 0 play_again = 'yes' while play_again != 'no': print('\n') scores, _, points, sp_scores = play_matches_between_networks(game, -1, best_version, 1, turns_to_tau0=0, goes_first=0) print('\nScores: ') print(scores) print('\nFirst PLAYER / Second PLAYER SCORES') print(sp_scores) print('Play again?') play_again = input()