def test_update_info(): othello = Othello(400, 4, "Name", "black") othello.gc.is_black = False othello.is_control = True othello.update_info() assert othello.gc.is_black assert not othello.is_control
def playGame(me, thehost, sess): sock = initClient(me, thehost) othello = Othello(0, state, me) while (True): print "Read" status = readMessage(sock) othello.reset(status[1], state, me) if (status[0] == me): if status[1] < 30: action_index = othello.smartMove() else: _state = get_state(me) _moves = valid_moves(getValidMoves(status[1], me)) q = sess.run(out, feed_dict={ "train/inputs:0": [_state], "train/moves:0": [_moves] })[0] action = nonzero_max(q) action_index = (action//8, action % 8) sel = str(action_index[0]) + "\n" + str(action_index[1]) + "\n" print "<" + sel + ">" sock.send(sel) print "sent the message" else: print "It isn't my turn" return
def __init__(self, board, move, own_turn): Othello.__init__(self) self.board = board self.turn = 0 self.own_turn = own_turn self.move = move self.pass_times = 0
def get_available_moves_of_start_tables(self, game: Othello): """ search self._start_table for move sequences starting with the one of game and get next elements of those :return: list of available moves """ if len(self._start_tables) == 0: self._init_start_tables() turn_nr = game.get_turn_nr() available_moves = [] taken_mv = game.get_taken_mvs_text() for move_sequence in self._start_tables: turn = 0 for move in move_sequence: # move was played if turn < turn_nr: if taken_mv[turn] != move: # move is different to start_table break # if start sequence is finished elif move != "nan": available_moves.append(move) break turn += 1 available_moves = list(dict.fromkeys(available_moves)) if "nan" in available_moves: available_moves.remove("nan") return available_moves
def test_init(self): game = Othello(4) self.assertEqual(game.board, Board(4)) self.assertEqual(game.active_color, 'black') game = Othello(8) self.assertEqual(game.board, Board(8)) self.assertEqual(game.active_color, 'black')
def get_move(self, game_state: Othello): """ Will select the best move according to the value of the resulting game_state according to monte carlo :param game_state: current game state :return: best move in available moves """ # Use start library if it is selected and still included if self._use_start_lib and game_state.get_turn_nr( ) < 21: # check whether start move match moves = self._start_tables.get_available_moves_of_start_tables( game_state) if len(moves) > 0: return util.translate_move_to_pair(moves[random.randrange( len(moves))]) # According to experience the number of moves to consider decreases relevantly after reaching a certain # turn number. Therefore it is possible to increase the search depth without loosing to much time. # We dynamically increase the search depth after reaching turn_number 40 search_depth = self._search_depth turn_number = game_state.get_turn_nr() if turn_number > 40: search_depth += turn_number // 10 # Dict used to store a list of the moves resulting in a state with the respective value best_moves = dict() # Evaluate each available move for move in game_state.get_available_moves(): # Play the move to get the resulting state next_state = game_state.deepcopy() next_state.play_position(move) # Evaluate the state using the selected function if self._use_monte_carlo: result = -AlphaBetaPruning.value_monte_carlo( next_state, search_depth - 1, self._heuristic, mc_count=self._mc_count) else: result = -AlphaBetaPruning.value( next_state, self._search_depth - 1, self._heuristic) # Append the move to the list of states with that value if result not in best_moves.keys(): best_moves[result] = [] best_moves[result].append(move) # Determine the best result best_result = max(best_moves.keys()) if self._use_monte_carlo: print(AlphaBetaPruning.value_monte_carlo.cache_info()) AlphaBetaPruning.value_monte_carlo.cache_clear() else: print(AlphaBetaPruning.value.cache_info()) AlphaBetaPruning.value.cache_clear() # Play one random move with the best possible result return best_moves[best_result][random.randrange( len(best_moves[best_result]))]
class OthelloGame: def __init__(self, net, ai_side, Tau=0, mcts_times=100): self.ai_player = AiPlayer(net, ai_side, Tau, mcts_times) self.game = Othello() self.ai_side = ai_side def playgame(self): side = -1 while not self.game.game_over(): self.game.print_board(side) print('score: ',self.game.getScore()) if len(self.game.possible_moves(side))!=0: if (side == self.ai_side): self.ai_player.get_move(self.game) else: while True: try: x, y = input("输入落子位置:").split() print(x, y) x, y = int(x), int(y) if (x, y) in self.game.possible_moves(side): self.game.play_move(x, y, side) break except Exception as e: print("输入错误, 重试", e) else: print("No where todo") side = -side print(self.game.getScore())
def minimax(board, depth, player, alpha=-np.inf, beta=np.inf, eval_func='pos_score', king_version=False): if depth == 0: if eval_func == 'pos_score': return pos_score_sum(board) elif eval_func == 'mobi': return mobility(board) elif eval_func == 'pos_mobi': return pos_plus_mobi(board) elif eval_func == 'king_pos_score': # this is for King Othello return king_pos_score_sum(board) if not king_version: game = Othello() else: game = KingOthello() game.board = board game.current_player = player possible_moves = game.find_all_valid_moves() if possible_moves: if player == BLACK: # maximizing player max_eval = -np.inf for move in possible_moves: game_copy = deepcopy(game) game_copy.take_move(move[0], move[1]) eval = minimax(game_copy.board, depth - 1, opposite(player), alpha, beta) max_eval = max(max_eval, eval) alpha = max(alpha, eval) if beta <= alpha: break return max_eval else: # WHITE, minimizing player min_eval = np.inf for move in possible_moves: game_copy = deepcopy(game) game_copy.take_move(move[0], move[1]) eval = minimax(game_copy.board, depth - 1, opposite(player), alpha, beta) min_eval = min(min_eval, eval) beta = min(beta, eval) if beta <= alpha: break return min_eval else: # no possible move for current player game.switch_turn() possible_moves = game.find_all_valid_moves( ) # check whether opponent has moves if possible_moves: return minimax(game.board, depth - 1, opposite(player), alpha, beta) # hand over to opponent, nothing changed else: # the opponent has no moves either, game over return pos_score_sum(game.board)
def heuristic(current_player, game_state: Othello): """ Calculates the value of game_state for current_player according to the Stored MonteCarlo Heuristic current_player is coded as the constants EMPTY_CELL, PLAYER_ONE and PLAYER_TWO form constants.py. Therefore the parameter is an integer values. """ moves = game_state.get_available_moves() turn_nr = game_state.get_turn_nr() # get maximum of likelihood values return max([database.db.get_change_of_winning(move, turn_nr, current_player) for move in moves])
def win_count(board, x, y): # repeat random_play() and count win, (x,y)is where put stone at first N = 100 win = 0 # vboard and vvboard are virtual boards for repeat random_play() vboard = Ot() vboard.put(x, y, board.turn) for i in xrange(N): vvboard.copy(vboard) if random_play(vboard) == board.turn: win += 1 return win
def __call__(self): """Boucle 'for' principale du simulateur.""" for i in range(self.nombre_parties): if self.fenetre: jeu = Othello(self.joueurs, self.fenetre) else: jeu = Othello(self.joueurs) jeu() if not jeu.fenetre.open: break self.gagnants.append(jeu.gagnant) if self.display: print(self)
def run_games(config): game = Othello() model = "" x = config.iterations while(x != 0): x -= 1 models = sorted(glob.glob(config.data.model_location+"*.h5")) if model == "": model = models[-1] print("Loading new model: %s" % util.getPlayerName(model)) ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move, model=model) elif models[-1] != model: model = models[-1] print("Loading new model: %s" % util.getPlayerName(model)) ai.load(model) start=time() for j in range(config.nb_game_in_file): util.print_progress_bar(j, config.nb_game_in_file, start=start) side = -1 turn = 1 while not game.game_over(): ai.tau = config.game.tau_1 if config.game.tau_swap < turn: ai.tau = config.game.tau_2 t = ai.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 ai.update_buffer(game.get_winner()) game.reset_board() #print("Average Game Time: ", (time()-start)/(config.nb_game_in_file)) util.print_progress_bar(config.nb_game_in_file, config.nb_game_in_file, start=start) save_games(config, ai.buffer) t.join()
def test_get_tile_start_pos(self): # Test for board of size 4x4 game = Othello(4) invalid_squares = [(), (4, 1), (4, 4), (0, 4), (-2, 4), (5, 9), (-6, -3), (4, -3), (8, 3), (2, -4), (1, 20)] for square in invalid_squares: self.assertEqual(game.get_tile_start_pos(square), ()) valid_squares = [(0, 0), (1, 0), (2, 3), (3, 3), (1, 2), (3, 1)] expected_results = [((-95.0, 75.0), -20), ((-95.0, 25.0), -20), ((95.0, -25.0), 20), ((95.0, -75.0), 20), ((45.0, 25.0), 20), ((-45.0, -75.0), -20)] for i in range(len(valid_squares)): self.assertEqual(game.get_tile_start_pos(valid_squares[i]), expected_results[i]) # Test for board of size 8x8 game = Othello() invalid_squares = [(), (8, 1), (8, 8), (0, 8), (-2, 8), (5, 9), (-6, -3), (8, -3), (12, 3), (2, -4), (1, 20)] for square in invalid_squares: self.assertEqual(game.get_tile_start_pos(square), ()) valid_squares = [(0, 0), (1, 0), (5, 3), (3, 7), (1, 2), (6, 6)] expected_results = [((-195.0, 175.0), -20), ((-195.0, 125.0), -20), ((-45.0, -75.0), -20), ((195.0, 25.0), 20), ((-95.0, 125.0), -20), ((145.0, -125.0), 20)] for i in range(len(valid_squares)): self.assertEqual(game.get_tile_start_pos(valid_squares[i]), expected_results[i])
def test_is_on_line(self): # Test for board of size 4x4 game = Othello(4) on_line = [(0.0, 0.0), (-10.12, 0.0), (50.0, 33.11), (58.62, -50.0), (-50.0, -50.0), (88.26, 50.0)] for point in on_line: self.assertTrue(game.is_on_line(point[0], point[1])) not_on_line = [(10.0, 10.0), (-52.3, 28.1), (40.1, 39.5), (88.77, 100.0), (75.0, -77.2), (-22.78, -11.16), (-61.99, 5.0), (-100.0, 33.11)] for point in not_on_line: self.assertFalse(game.is_on_line(point[0], point[1])) # Test for board of size 8x8 game = Othello() on_line = [(0.0, 0.0), (-10.12, 0.0), (-100.0, 33.11), (58.62, -150.0), (-50.0, -50.0), (88.26, 100.0), (199.0, 50.0)] for point in on_line: self.assertTrue(game.is_on_line(point[0], point[1])) not_on_line = [(10.0, 10.0), (-52.3, 28.1), (140.1, 39.5), (88.88, 200.0), (75.0, -77.2), (-22.78, -111.16), (-61.99, 105.0), (200.0, 50.0)] for point in not_on_line: self.assertFalse(game.is_on_line(point[0], point[1]))
def test_is_on_board(self): ''' Valid input: every point on the board (including on the line but excluding on the bound) ''' # Test for board of size 4x4 game = Othello(4) on_board = [(0.0, 0.0), (40.1, 39.5), (75.0, -77.2), (-22.78, -11.16), (-61.99, 5.0), (50.0, -50.0), (-71.23, 0.0)] for point in on_board: self.assertTrue(game.is_on_board(point[0], point[1])) not_on_board = [(-100.0, 100.0), (100.0, 100.0), (150.23, 77.0), (5.0, -100.10), (-68.98, 177.54), (-1.5, 200.2)] for point in not_on_board: self.assertFalse(game.is_on_board(point[0], point[1])) # Test for board of size 8x8 game = Othello() on_board = [(0.0, 0.0), (40.1, 39.5), (175.0, -77.2), (-22.78, -11.16), (-61.99, 195.0), (50.0, -50.0), (-71.23, 0.0), (99.0, 188.0)] for point in on_board: self.assertTrue(game.is_on_board(point[0], point[1])) not_on_board = [(-200.0, 200.0), (200.0, 200.0), (250.23, 77.0), (5.0, -200.10), (-368.98, 177.54), (-31.5, 200.2), (320.56, 201.11), (-278.9, -150.3)] for point in not_on_board: self.assertFalse(game.is_on_board(point[0], point[1]))
def value_monte_carlo(game_state: Othello, depth, heuristic, alpha=-1, beta=1, mc_count=100): """ get score for alpha beta pruning :param game_state: actual game state :param depth: do alpha beta pruning this depth :param heuristic: score game state after alpha beta pruning with this heuristic :param mc_count: number of games which are played in each terminal node after alpha beta pruning :param alpha: value of alpha :param beta: value of beta :return: score of move Compare https://github.com/karlstroetmann/Artificial-Intelligence/blob/master/SetlX/game-alpha-beta.stlx """ if game_state.game_is_over(): return game_state.utility(game_state.get_current_player()) if depth == 0: # use monte carlo player if enabled # mc_count = number of played games mc = MonteCarlo(big_number=mc_count, use_start_libs=False, preprocessor_n=-1, heuristic=heuristic, use_multiprocessing=True) # get best move move = mc.get_move(game_state) # return winnings stats of best move prob = mc.get_move_probability(move) return prob val = alpha for move in game_state.get_available_moves(): next_state = game_state.deepcopy() next_state.play_position(move) val = max({ val, -1 * AlphaBetaPruning.value_monte_carlo(next_state, depth - 1, heuristic, -beta, -alpha, mc_count=mc_count) }) if val >= beta: return val alpha = max({val, alpha}) return val
def test__init__(): othello = Othello(400, 4, "Name", "black") board = Board(400, 4) gc = GameController(400, 400, "Name", "black") tiles = [Tile(100) for _ in range(16)] tile = tiles.pop() tile.set_white() board.grids[1][1] = tile tile = tiles.pop() tile.set_black() board.grids[1][2] = tile tile = tiles.pop() tile.set_black() board.grids[2][1] = tile tile = tiles.pop() tile.set_white() board.grids[2][2] = tile assert othello.board == board assert othello.gc == gc assert othello.tiles == tiles assert othello.valid_grids == set({}) assert othello.is_black assert not othello.is_control
def test_get_valid_flips(self): # Set up a testable game game = Othello(8) game.active_color = 'white' game.board.record_tile(Point(2, 2), 'white') game.board.record_tile(Point(5, 4), 'black') game.board.record_tile(Point(6, 5), 'black') game.board.record_tile(Point(7, 5), 'white') game.board.record_tile(Point(6, 6), 'white') game.board.record_tile(Point(5, 6), 'black') game.board.record_tile(Point(5, 7), 'black') game.board.record_tile(Point(4, 6), 'black') # Test good input (a move that results in a variety of flips or lack # thereof in each direction) test_move = game.get_valid_flips(Point(5, 5)) expected_outcome = [Point(6, 5), Point(4, 4), Point(3, 3)] self.assertEqual(test_move, expected_outcome) # Test bad inputs: # 1. Move outside the board # 2. Move that targets an occupied square # 3. Argument is None instead of a point outside_board_test = game.get_valid_flips(Point(100, 100)) occupied_board_test = game.get_valid_flips(Point(2, 2)) none_point_test = game.get_valid_flips(None) self.assertEqual(outside_board_test, []) self.assertEqual(occupied_board_test, []) self.assertEqual(none_point_test, [])
def __call__(self, number_of_games): for i in range(number_of_games): #print("Training game:",i) game = Othello(self.window, self.players, self.affichage) game() self.games.append(game) for id_player, player in enumerate(self.players): if isinstance(player, NeuralNetwork): self.teach(id_player, game) self.players[id_player].train()
def main(): random_state = random.Random() player_w = OthelloAgent("W", RandomAction(random_state)) player_b = OthelloAgent("B", RandomAction(random_state)) environment = Othello(player_w, player_b) episode = OthelloVerboseEpisode() run_episode(environment, episode)
def mobility(board): # defined number of possible moves : black - white g1 = Othello() g1.board = board g1.current_player = BLACK score_black = len(g1.find_all_valid_moves()) g1.current_player = WHITE score_white = len(g1.find_all_valid_moves()) return score_black - score_white
def main(): episodes = 10 random_state = random.Random() player_w = OthelloAgent("W", AlphaBetaPruning(2)) player_b = OthelloAgent("B", RandomAction(random_state)) environment = Othello(player_w, player_b) run_timed_episodes(environment, episodes)
def main(): episodes = 10 random_state = random.Random() player_w = OthelloAgent("W", MonteCarloTreeSearch()) player_b = OthelloAgent("B", RandomAction(random_state)) environment = Othello(player_w, player_b) run_timed_episodes(environment, episodes)
def test_control(): othello = Othello(400, 4, "Name", "black") # test if I can make an illegal move othello.update_info() othello.control(0, 0) assert not othello.board.grids[0][0] assert othello.is_black assert not othello.is_control # test if I can make a legal move othello.update_info() othello.control(0, 1) assert not othello.is_black assert othello.is_control assert isinstance(othello.board.grids[0][1], Tile) assert othello.board.grids[0][1].is_black assert isinstance(othello.board.grids[1][1], Tile) assert othello.board.grids[1][1].is_black
def value(game_state: Othello, depth, heuristic, alpha=-1, beta=1): """ Get value for game_state according to alpha beta pruning :param game_state: The state to evaluate :param depth: do alpha beta pruning until this depth is reached :param heuristic: Function reference for the heuristic used to score game state after maximum search depth is reached :param alpha: value of alpha :param beta: value of beta :return: value of move Compare https://github.com/karlstroetmann/Artificial-Intelligence/blob/master/SetlX/game-alpha-beta.stlx """ if game_state.game_is_over(): return game_state.utility(game_state.get_current_player()) if depth == 0: # return heuristic of game state return heuristic(game_state.get_current_player(), game_state) val = alpha for move in game_state.get_available_moves(): next_state = game_state.deepcopy() next_state.play_position(move) val = max({ val, -1 * AlphaBetaPruning.value(next_state, depth - 1, heuristic, -beta, -alpha) }) if val >= beta: return val alpha = max({val, alpha}) return val
def get_move(game_state: Othello): """ interface function of all players :param game_state: actual game state :return: random move in available moves """ # Get the legal moves possible_moves = game_state.get_available_moves() # As the dict_keys Object returned by the function does not support indexing and Indexing is required here # Convert it to a list possible_moves = list(possible_moves) # Return a Random move return rnd.choice(possible_moves)
def choix_coup(self, coups, jplateau): """Selection d'un coup par le joueur :param coups: les coups possibles pour le joueur :param jplateau: le plateau de jeu :returns: le coup choisi par le joueur (ici prend le coup qui a le plus grand nombre de point a gagné,) """ C = [Othello.Nb_transforme(jplateau, self, coup) for coup in coups] c = numpy.argmax(C) return coups[c]
def test_init(self): # Test for board of size 0 game = Othello(0) self.assertEqual(game.n, 0) self.assertEqual(game.board, []) # Test for board of size 2x2 game = Othello(2) self.assertEqual(game.n, 2) expected_board = [[0, 0], [0, 0]] self.assertEqual(game.board, expected_board) # Test for board of size 4x4 game = Othello(4) self.assertEqual(game.n, 4) expected_board = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] self.assertEqual(game.board, expected_board) self.assertEqual(game.square_size, 50) self.assertEqual(game.board_color, 'forest green') self.assertEqual(game.line_color, 'black') self.assertEqual(game.tile_size, 20) self.assertEqual(game.tile_colors, ['black', 'white']) self.assertEqual(game.move, ()) self.assertEqual(game.current_player, 0) self.assertEqual(game.num_tiles, [2, 2]) # Test for board of size 8x8 game = Othello() self.assertEqual(game.n, 8) self.assertEqual(game.board, EMPTY_BOARD) self.assertEqual(game.square_size, 50) self.assertEqual(game.board_color, 'forest green') self.assertEqual(game.line_color, 'black') self.assertEqual(game.tile_size, 20) self.assertEqual(game.tile_colors, ['black', 'white']) self.assertEqual(game.move, ()) self.assertEqual(game.current_player, 0) self.assertEqual(game.num_tiles, [2, 2])
def test_put_tile_on(): othello = Othello(400, 4, "Name", "black") tile = Tile(100) othello.put_tile_on(0, 0) assert othello.board.grids[0][0] == tile othello.is_black = False othello.put_tile_on(0, 0) tile.flip() assert othello.board.grids[0][0] == tile
def test_eq(self): game1 = Othello(4) game2 = Othello(4) game3 = Othello(8) self.assertTrue(game1 == game2) self.assertFalse(game1 == game3) self.assertFalse(game2 == game3) game1.board[0][0] = 1 self.assertFalse(game1 == game2) game2.board[0][0] = 1 self.assertTrue(game1 == game2) game1.current_player = 1 self.assertFalse(game1 == game2) game2.current_player = 1 self.assertTrue(game1 == game2) game2.current_player = 2 self.assertFalse(game1 == game2)
def get_sign(current_player, field_value): """ Returns an indicator whether the field_value denotes a field as owned by current_player 1: if the field_value indicates the field is owned by the current_player 0: if the field_value indicates neither player owns the field -1: If the field_value indicates the field is owned by opponent of current_player Both current_player and field_value are coded as the constants EMPTY_CELL, PLAYER_ONE and PLAYER_TWO form constants.py. Therefore both parameters are integer values. """ if field_value == current_player: return 1 elif field_value == Othello.other_player(current_player): return -1 else: return 0
def get_move(game_state: Othello): """ interface function of all players Asks the user for a move and returns the selection :param game_state: actual game state :return: best move in available moves """ # Create a data structure to use with util.select_one possibilities = [] for move in game_state.get_available_moves(): (row, col) = move description = f"({COLUMN_NAMES[col]}{row + 1})" possibilities.append((description, move)) # Return the users selection return util.select_one(possibilities, "Select your move:")
def pick_move(self, game, side): possible_moves = game.possible_moves(side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) monte_prob = self.monte_carlo(game, side) if self.train: self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob)))) monte_prob = np.float_power(monte_prob, 1/self.tau) monte_prob = np.divide(monte_prob, np.sum(monte_prob)) r = random() for i, move in enumerate(possible_moves): r -= monte_prob[Othello.move_id(move)] if r <= 0: return move return possible_moves[-1]
def pick_move(self, game, side): print("You are playing", Othello.piece_map(side)) t = game.possible_moves(side) if len(t) == 0: game.print_board() print("No moves availible. Turn skipped.") return (-1, -1) move = (-1, -1) while move not in t: try: row = int(input("Please input row: ")) col = int(input("Please input col: ")) move = (row, col) if move not in t: game.print_board() print("Please input a valid move") except Exception: game.print_board() print("Please input a valid move") print() return move
def __init__(self, state:othello.Othello): '''Initializes an Othello application''' self.state = state self.board = state.start() self.root_window = tkinter.Tk() self.root_window.title("Othello") self.canvas = tkinter.Canvas( master = self.root_window, width=500, height=500, background = "#808080") self.canvas.grid( row = 1, column = 0, columnspan=2, padx=10, pady=10, sticky = tkinter.N + tkinter.S + tkinter.E + tkinter.W) self.canvas.bind("<Configure>", self.on_canvas_resized) self.canvas.bind("<Button-1>", self.on_canvas_clicked) self.root_window.rowconfigure(0, weight=1) self.root_window.rowconfigure(1, weight=2) self.root_window.columnconfigure(0, weight=1) self.root_window.columnconfigure(1, weight=2)
def monte_carlo(self, game, side): N = defaultdict(lambda: 0) W = defaultdict(lambda: 0) Q = defaultdict(lambda: 0) P = defaultdict(lambda: 0) possible_moves = game.possible_moves(side) if len(possible_moves) == 0: policy = np.zeros((65)) policy[64] = 1 return policy elif len(possible_moves) == 1: policy = np.zeros((65)) policy[Othello.move_id(possible_moves[0])] = 1 return policy current_input = self.preprocess_input(game.board, side) sid = Othello.state_id(game.board) pred = self.network.predict(current_input[np.newaxis,:]) policy = pred[0][0] total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total for i in range(self.sim_count): #print("Sim #%d"% i) clone = deepcopy(game) current_side = side visited = deque() while True: possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) best_move = None best_move_value = -2 sid = Othello.state_id(clone.board) for move in possible_moves: mid = Othello.move_id(move) qu_val = Q[(sid, mid)] + P[(sid, mid)]/(N[(sid, mid)]+1) if qu_val > best_move_value: best_move_value = qu_val best_move = move #print(best_move) if N[(sid, Othello.move_id(best_move))] == 0: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner()*side Q[node] = W[node]/N[node] break current_input = self.preprocess_input(clone.board, current_side) sid = Othello.state_id(clone.board) pred = self.network.predict(current_input[np.newaxis,:]) policy = pred[0][0] value = pred[1][0] possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total for node in visited: N[node] += 1 W[node] += value*side Q[node] = W[node]/N[node] #print() break else: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner()*side Q[node] = W[node]/N[node] break policy = np.zeros((65)) possible_moves = game.possible_moves(side) sid = Othello.state_id(game.board) for move in possible_moves: mid = Othello.move_id(move) policy[mid] = N[(sid,mid)] return policy
def run_games(config): game = Othello() model_1 = "" model_2 = "" p1, new_1 = create_player(config.model_1, model_1, config) p2, new_2 = create_player(config.model_2, model_2, config) if config.model_1 == "newest" or config.model_2 == "newest": i = len(glob.glob(config.data.model_location+"*.h5"))-1 else: i = 0 avg_wins = [] while True: i += 1 new_1 = load_player(p1, config.model_1, model_1, config) new_2 = load_player(p2, config.model_2, model_2, config) while((config.model_1 == "newest" and new_1 == model_1) or (config.model_2 == "newest" and new_2 == model_2)): #print("Waiting on new model. Sleeping for 1 minute.") sleep(60) new_1 = load_player(p1, config.model_1, model_1, config) new_2 = load_player(p2, config.model_2, model_2, config) model_1 = new_1 model_2 = new_2 wins = 0 losses = 0 ties = 0 print("Iteration %04d"%i) print("Playing games between %s and %s" % (config.model_1, config.model_2)) print("Playing %d games with %d simulations per move" % (config.game_num, config.game.simulation_num_per_move)) start=time() for j in range(config.game_num): util.print_progress_bar(j, config.game_num, start=start) side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 if config.model_1 != "random": p1.tau =tau if config.model_2 != "random": p2.tau = tau if j % 2 == 0: if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) else: if side == 1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == 0: ties += 1 savePerformance(config, model_1, model_2, 0, 1, 0) elif (j % 2 == 0 and game.get_winner() == -1) or (j % 2 == 1 and game.get_winner() == 1): wins += 1 savePerformance(config, model_1, model_2, 1, 0, 0) else: losses += 1 savePerformance(config, model_1, model_2, 0, 0, 1) game.reset_board() util.print_progress_bar(config.game_num, config.game_num, start=start) print("%s vs %s: (%0.2f%% wins|%0.2f%% ties|%0.2f%% losses) of %d games" % (config.model_1, config.model_2, 100*wins/config.game_num, 100*ties/config.game_num, 100*losses/config.game_num, config.game_num)) avg_wins.append(100*wins/config.game_num) if len(avg_wins) > config.rolling_avg_amount: avg_wins = avg_wins[-1*config.rolling_avg_amount:] print("Average Win Percent: %0.2f%%" % (sum(avg_wins)/float(len(avg_wins)))) if not (config.repeat_with_new_model and (config.model_1 == "newest" or config.model_2 == "newest")): break
def calc_ranking(config): models = sorted(glob.glob(config.data.model_location+"*.h5")) players = [] for i, model in enumerate(models): if i % config.model_skip == 0 or i == len(models): players.append(model) wtl = np.zeros((len(players), len(players), 3)) win_matrix = np.zeros((len(players),len(players))) game = Othello() challenger1 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[-1], tau=config.game.tau_1) challenger2 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[0], tau=config.game.tau_1) total_games = (config.game_num_per_model * (len(players)))//2 played_games = 0 finished = False start = time() print("Ranking with %d players and %d games per player" % (len(players), config.game_num_per_model)) if config.game_num_per_model < len(players): print("We suggest that you increase games per player to be greater than players") for i in itertools.count(): ranks = getRankings(win_matrix) if len(ranks) == 0: msg = "No Clear Best Yet" else: msg = "Current Best is "+util.getPlayerName(players[ranks[-1]]) if config.print_best: print(msg.ljust(90)) for j in range(len(players)): util.print_progress_bar(played_games, total_games, start=start) challenger1_index = getLeastPlayed(win_matrix, j) AIPlayer.clear() challenger1.load(players[challenger1_index]) challenger2.load(players[j]) if random.random() < 0.5: challenger1_side = -1 p1 = challenger1 p2 = challenger2 else: challenger1_side = 1 p1 = challenger2 p2 = challenger1 side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 p1.tau = tau p2.tau = tau if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == challenger1_side: win_matrix[challenger1_index,j] += 1 wtl[challenger1_index, j,0] += 1 elif game.get_winner() == -1*challenger1_side: win_matrix[j, challenger1_index] += 1 wtl[challenger1_index, j,2] += 1 else: win_matrix[challenger1_index,j] += 0.5 win_matrix[j, challenger1_index] += 0.5 wtl[challenger1_index, j, 1] += 1 game.reset_board() played_games += 1 if played_games >= total_games: finished = True break saveWTL(config, players, wtl) wtl = np.zeros((len(players), len(players), 3)) if finished: break util.print_progress_bar(total_games, total_games, start=start) print("\n",[util.getPlayerName(player) for player in players]) print("\nWin Matrix(row beat column):") print(win_matrix) try: with np.errstate(divide='ignore', invalid='ignore'): params = choix.ilsr_pairwise_dense(win_matrix) print("\nRankings:") for i, player in enumerate(np.argsort(params)[::-1]): print("%d. %s (expected %d) with %0.2f rating"% (i+1, util.getPlayerName(players[player]), len(players)-player, params[player])) print("\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)") except Exception: print("\nNot Enough data to calculate rankings")
def __init__(self, own_turn): Othello.__init__(self) self.turn = own_turn self.own_turn = own_turn
def run(self): self.game_gui = Canvas(self.root, width=600, height=600, background='green') self.game_gui.bind("<Button-1>", self.click) self.game_gui.focus_set() self.game_gui.bind("<Key>", self.key) self.game_gui.pack() for i in range(1, 8): self.game_gui.create_line(0, i*75, 600, i*75) self.game_gui.create_line(i*75, 0, i*75, 600) self.pieces = [] for i in range(8): self.pieces.append([]) for j in range(8): self.pieces[i].append(self.game_gui.create_oval(i*75+5, j*75+5, (i+1)*75-5, (j+1)*75-5, fill="green", outline="green")) self.root.protocol("WM_DELETE_WINDOW", self.on_closing) self.root.resizable(0,0) self.running = True config = EvaluateConfig() tf_util.update_memory(config.gpu_mem_fraction) AIPlayer.create_if_nonexistant(config) self.game = Othello() if(random() > 0.5): self.human = 1 else: self.human = -1 ai = create_player(config.model_1, config) #print("You are playing against", config.model_1) #print("Playing games with %d simulations per move" % config.game.simulation_num_per_move) self.side = -1 self.draw_board() self.value = ai.evaluate(self.game, self.side) while self.running and not self.game.game_over(): #play move if self.side != self.human: self.value = ai.evaluate(self.game, self.side) self.root.title("Othello (Thinking of Move) Current Value: %0.2f (1 white wins, -1 black wins)" % self.value) self.root.config(cursor="wait") t = ai.pick_move(self.game, self.side) self.game.play_move(t[0], t[1], self.side) self.draw_board() self.side *= -1 self.value = ai.evaluate(self.game, self.side) else: if len(self.game.possible_moves(self.side)) == 0: self.side *= -1 continue if self.side == -1: color = "black" else: color = "white" self.root.title("Othello (Play as %s) Current Value: %0.2f (1 white wins, -1 black wins)" % (color, self.value)) self.root.config(cursor="") if self.update: self.update = False if (self.x, self.y) in self.game.possible_moves(self.side): self.game.play_move(self.x, self.y, self.side) self.draw_board() self.side *= -1 time.sleep(0.01) self.root.config(cursor="") if self.human == self.game.get_winner(): self.root.title("Othello (You Win!)") elif self.game.get_winner() == 0: self.root.title("Othello (Its a draw!)") else: self.root.title("Othello (You Lose!)")
import matplotlib.pyplot as plt sess = tf.InteractiveSession() # Set hyper parameters and variables M = 1000; '''Number of episodes''' LEARNING_RATE = 0.001; '''Base learning rate''' GAMMA = 0.99; '''Discount factor''' BUFFER_SIZE = 100000 BATCH_SIZE = 64 RANDOM_SEED = 1234 dim = 8 # Set up environment env = Othello(dim) state = state_prime = env.reset() action = np.zeros(len(state)) # create deep q network agent = DeepQNetwork(sess, state, action, LEARNING_RATE, 0.001, GAMMA) sess.run(tf.initialize_all_variables()) agent.update_target_network() # Initialize replay buffer Replay Replay = ReplayBuffer(BUFFER_SIZE, random_seed=RANDOM_SEED, prioritized=False) def nonzero_max(actions): indices = np.nonzero(actions)[0] mapping = []
from othello import Othello from random_agent import RandomAgent import time # Global Variables board_size = 8 number_of_games = 100 player_1_wins = 0 player_2_wins = 0 draw_games = 0 total_start_time = time.time() for i in range(number_of_games): othello = Othello(board_size) player = 1 start_time = time.time() while True: move = RandomAgent.pick_move(othello, player) if move != -1: othello.put(move, player) game_over = othello.is_game_over(player) player *= -1 if game_over != None: if game_over == 0: draw_games += 1 else: if game_over == 1: player_1_wins += 1 else: