def update(self, state, action): print("ENVIRONMENT UPDATE: ", state, action) # given the current state, determine reward and next state next_state = board.add_move(action['player'], action['cell'], state) reward = 0.0 if play.is_winner(next_state, action['player']): reward = 1.0 self.is_complete = True self.outcome = action['player'] + "_WINNER" elif play.is_cat_game(next_state): self.is_complete = True self.outcome = "CAT_GAME" if not self.is_complete: # OTHERWISE, let the OTHER player play opponent_id, opponent_cell = self.opponent.pick_next_move( next_state) next_state = board.add_move(opponent_id, opponent_cell, next_state) if play.is_winner(next_state, opponent_id): reward = -1.0 self.is_complete = True self.outcome = opponent_id + "_WINNER" elif play.is_cat_game(next_state): self.is_complete = True self.outcome = "CAT_GAME" return next_state, reward
def model_environment(opponent, state, action): game_complete = False initial_board = state file.write("AGENT MAKING MOVE: " + str(action) + str(board.to_state(action)) + "\n") current_board = p.add_move('X',action,initial_board) print("AFTER AGENT MOVE:") print(game.to_display_string(current_board)) file.write("AFTER AGENT MOVE:\n") file.write(game.to_display_string(current_board)) reward = 0.0 if p.is_winner(current_board,'X'): game_complete = True reward = 1.0 elif p.is_cat_game(current_board): game_complete = True reward = 0.0 if not game_complete: # let the opponent make a move ... (opponent_id, opponent_move) = opponent.pick_next_move(current_board) current_board = p.add_move(opponent_id, opponent_move, current_board) print("AFTER OPPONENT MOVE") print(game.to_display_string(current_board)) file.write("AFTER OPPONENT MOVE\n") file.write(game.to_display_string(current_board)) if p.is_winner(current_board,opponent_id): game_complete = True reward = -1.0 elif p.is_cat_game(current_board): game_complete = True reward = 0 return current_board, reward, game_complete
def play_game(p1, p2, file=None): board = p.empty_board() players = [p1, p2] current_player_index = 0 winner = None move_count = 0 while (True): print("Current move is for player: ", players[current_player_index].player) if (file is not None): file.write("PRIOR TO MOVE " + str(move_count) + " ------------\n") file.write(to_display_string(board)) if p.is_cat_game(board): if (file is not None): file.write("RESULT IS CAT GAME") break m = players[current_player_index].pick_next_move(board) board = p.add_move(m[0], m[1], board) p.display_board(board) move_count += 1 if p.is_winner(board, players[current_player_index].player): winner = players[current_player_index] if (file is not None): file.write("FINAL BOARD AFTER MOVE " + str(move_count) + " WINNER IS: " + winner.player + "\n") file.write(to_display_string(board)) break # alternate players if (current_player_index == 0): print("Switching to player 1...") current_player_index = 1 else: print("Switching to player 0...") current_player_index = 0 if (winner is None): print("CAT GAME") else: print("WINNER IS PLAYER: ", winner.player) return winner
def get_reward(self, board, action): next_board = action['board'] print("In REWARD given next board -----") p.display_board(next_board) if p.is_winner(next_board, self.player): return 1.0 elif p.is_potential_loser_on_next_move(next_board, self.player): print("POTENTIAL LOSER BOARD: ") self.log.write("POTENTIAL LOSING BOARD -----\n") self.log.write(self.to_string(next_board)) p.display_board(next_board) return -0.5 else: return 0.0
def pick_next_move(self, current_board): # identify all available cells available_cells = [] for row in p.get_rows(current_board): for col in p.get_cols(current_board): cell = (row, col) if (not p.is_occupied(current_board, cell)): available_cells.append(cell) for cell in available_cells: tmp_board = board.add_move(self.player, cell, current_board) if p.is_winner(tmp_board, self.player): return self.player, cell # otherwise, pick move at random n = len(available_cells) inx_select = np.random.randint(n) return (self.player, available_cells[inx_select])