def expectiminimax(move: MoveNode, ply, color, heuristic, dice=None): if ply > 2: raise Exception("don't do more than 2") board = move.board_after if ply == 0 or board.getWinner() != NONE: return heuristic.apply(board, color), move if dice: # assume that it is color's move alpha = -math.inf return_move = None children = get_board_children(board, color, dice=dice) for new_move in children: new_alpha = expectiminimax(new_move, ply - 1, color, heuristic)[0] if new_alpha > alpha: return_move = new_move alpha = new_alpha else: # assume that is not color's move roll_dict = get_board_children(board, getOtherColor(color)) alpha = 0 return_move = None for roll in roll_dict: roll_alpha = math.inf for new_move in roll_dict[roll]: roll_alpha = min( roll_alpha, expectiminimax(new_move, ply - 1, color, heuristic)[0]) alpha = alpha + roll_alpha * probability[roll] return alpha, return_move
def apply(self, board: Board): if board.numAt(getOtherColor(self.color), self.end) > 1: raise IllegalMoveException("Other player occupies the location " + str(self.end)) scratch = board.__deepcopy__() # VALID MOVEMENT, check if hit if scratch.numAt(getOtherColor(self.color), self.end) == 1: scratch.removeFromLocation(getOtherColor(self.color), self.end) scratch.moveToBar(getOtherColor(self.color)) self.hit = True scratch.moveFromBar(self.color) scratch.moveToLocation(self.color, self.end) return scratch
def alpha_beta(move: MoveNode, ply, color, heuristic, alpha=-math.inf, beta=math.inf, dice=None): if ply > 2: raise Exception("don't do more than 2") board = move.board_after if ply == 0 or board.getWinner() != NONE: return heuristic.apply(board, color), move if dice: # assume that it is color's move value = -math.inf return_move = None children = get_board_children(board, color, dice=dice) for new_move in children: new_value = alpha_beta(new_move, ply - 1, color, heuristic, alpha, beta)[0] if new_value > value: return_move = new_move value = new_value alpha = max(alpha, value) if alpha >= beta: break return value, return_move else: # assume that is not color's move roll_dict = get_board_children(board, getOtherColor(color)) value = 0 used_prob = 0 return_move = None done = {} for roll in roll_dict: roll_value = math.inf for new_move in roll_dict[roll]: board_after = new_move.board_after if board_after in done: new_value = done[board_after] else: new_value = alpha_beta(new_move, ply - 1, color, heuristic, alpha, beta)[0] done[board_after] = new_value roll_value = min(roll_value, new_value) value = value + roll_value * probability[roll] used_prob += probability[roll] upper_bound_value = value + heuristic.MAX * (1 - used_prob) beta = min(beta, upper_bound_value) if beta <= alpha: break return value, return_move
def won(self, board, value): if self.learning: if self.network.num_outputs == 1: if self.color == BLACK: expected = tf.constant([[1.]]) elif self.color == WHITE: expected = tf.constant([[0.]]) elif self.network.num_outputs == 4: if self.color == WHITE: if value == 1: expected = tf.constant([[0., 0., 1., 0.]]) else: expected = tf.constant([[0., 0., 0., 1.]]) elif self.color == BLACK: if value == 1: expected = tf.constant([[1., 0., 0., 0.]]) else: expected = tf.constant([[0., 1., 0., 0.]]) self.evaluate(board, getOtherColor(self.color), self.color) self.network.backprop(expected)
def get_move(self, backgammon): # pa4 specs to_move = self.color next_player = getOtherColor(self.color) board = backgammon.board moves = generate_moves(board, to_move, backgammon.dice) best, value, output = None, -1000000, None for move in moves: new_value, new_output = self.evaluate(move.board_after, next_player, to_move) # print(move, " with value", new_value) if new_value > value: best = move value = new_value output = new_output if self.learning: self.evaluate(board, to_move, to_move) self.network.backprop(output) return best
def get_moves(color, distance_dict, starting_loc, root): # TODO: add doubles and single piece move printing # BASE CASES if not distance_dict: return if starting_loc is False: return board = root.board_after if board.getWinner() != NONE: return die_1 = max(distance_dict) die_2 = min(distance_dict) ############################################# # Pieces on bar if board.numBar(color) > 0: try: move = BarMovement( color, getRelativePointLocation(getOtherColor(color), die_1)) scratch = move.apply(board) # apply die 1 move_node = MoveNode(root.name + " " + str(move), scratch, die=die_1, deep=root.deep + 1) root.children.append(move_node) distance_dict_1 = update_distance_dict(die_1, distance_dict) get_moves(color, distance_dict_1, scratch.farthestBack(color), move_node) except IllegalMoveException: pass if die_1 != die_2: try: # apply die 2 if different move = BarMovement( color, getRelativePointLocation(getOtherColor(color), die_2)) scratch = move.apply(board) # apply die 2 move_node = MoveNode(root.name + " " + str(move), scratch, die=die_2, deep=root.deep + 1) root.children.append(move_node) distance_dict_1 = update_distance_dict(die_2, distance_dict) get_moves(color, distance_dict_1, scratch.farthestBack(color), move_node) except IllegalMoveException: pass # # Able to bear off elif board.allInHome(color): try: move = TakeOffMovement(color, die_1, starting_loc) scratch = move.apply(board) move_node = MoveNode(root.name + " " + str(move), scratch, die=die_1, deep=root.deep + 1) root.children.append(move_node) distance_dict_1 = update_distance_dict(die_1, distance_dict) new_start = get_next_location(scratch.getCheckers(color), starting_loc, color) get_moves(color, distance_dict_1, new_start, move_node) except IllegalMoveException: pass do_normal_move(color, distance_dict, starting_loc, root, board, die_1, die_2) # All other cases: only normal moves remain else: do_normal_move(color, distance_dict, starting_loc, root, board, die_1, die_2)
def apply(board, color): # TODO: not bounded if board.getWinner() == color: return 1000000000 return board.pips(getOtherColor(color)) / board.pips(color)
def apply(board, color): pips = board.pips(getOtherColor(color)) return pips / 375