def main(): state, msg = g.initialize(), '' while g.is_final_state(state) == 0: # display clear() print(msg) msg = '' g.display_board(state) # if state[1] == -1: sleep(0.6) # game logic if state[1] == 1: # old_pos, new_pos = (0, 0), (0, 0) # try: # print('choose a piece to move:') # old_pos = (int(input(' row = ')), int(input(' col = '))) # print('choose where to move it:') # new_pos = (int(input(' row = ')), int(input(' col = '))) # except: # msg = INPUT_ERROR_MSG # continue # if g.is_valid_transition(state, old_pos, new_pos): # state = g.transition(state, old_pos, new_pos) # else: # msg = MOVE_ERROR_MSG start = time() t = s.minimax(state)[0] state = g.transition(state, t[0], t[1]) log.write(f' Minimax: {round((time() - start) * 1000, 2)} ms\n') else: start = time() t = s.minimum_value(state)[0] state = g.transition(state, t[0], t[1]) log.write(f'Apha beta: {round((time() - start) * 1000, 2)} ms\n\n') # end game clear() g.display_board(state) switch = { 1: lambda: print(colored('⬤ ', 'white') + 'White won!'), -1: lambda: print(colored('⬤ ', 'grey') + 'Black won!'), 2: lambda: print( colored('⬤ ', 'white') + 'White is blocked. It\'s a draw!'), -2: lambda: print( colored('⬤ ', 'grey') + 'Black is blocked. It\'s a draw!') } switch[g.is_final_state(state)]()
def play(agentBlack, agentWhite, vizualize=False): state = game.State.init() agents = [agentBlack, agentWhite] mctsNodes = [MCTSNode(state)] * 2 if agentBlack is agentWhite else [ MCTSNode(state), MCTSNode(state) ] experience = [] end = False i = 0 while not end: turn = i % 2 agent = agents[turn] mctsNode = mctsNodes[turn] action = agent.get_action(mctsNode) mcts_policy = agent.get_mcts_policy(mctsNode) mctsNodes[0] = mctsNodes[0].next(action) mctsNodes[1] = mctsNodes[1].next(action) experience.append([state, mcts_policy]) state = game.transition(state, action) if vizualize: print_board(state) end = state.isEnd i += 1 z = state.endResult return experience, z
def exploit(state): state_id = state_index(state) state_actions_values = [None] * len(MATRIX) for i in range(0, len(MATRIX)): state_actions_values[i] = [MATRIX[i][state_id]] # find the index of the best action best_action_index = state_actions_values.index(max(state_actions_values)) action_to_play = game.ACTIONS[best_action_index] next_state = game.transition(state, action_to_play) return next_state
def minimum_value(state, alpha=-inf, beta=inf, depth=0, max_depth=4): if depth >= max_depth or g.is_final_state(state) != 0: return (None, g.heuristic(state)) min_val, action = +inf, None for t in g.possible_transitions(state): succ = g.transition(state, t[0], t[1]) _, val = maximum_value(succ, alpha, beta, depth + 1, max_depth) if min_val > val: min_val = val action = t if alpha >= min_val: return (action, min_val) beta = min(min_val, beta) return (action, min_val)
def minimax(state, maximise=False, depth=0, max_depth=4): if depth >= max_depth or g.is_final_state(state) != 0: return (None, g.heuristic(state)) hs = [] for t in g.possible_transitions(state): new_state = g.transition(state, t[0], t[1]) hs.append((t, minimax(new_state, not maximise, depth + 1, max_depth)[1])) if maximise: result = max(hs, key=lambda item: item[1]) return result else: result = min(hs, key=lambda item: item[1]) return result
def explore(state): action = game.ACTIONS[random.randint(0, 7)] next_state = game.transition(state, action) update_Matrix(state, action, next_state) return next_state
def next(self, move): if self.state.isEnd: return self if move not in self.children: self.children[move] = MCTSNode(game.transition(self.state, move)) return self.children[move]