def play_games(num_games):
    player_1_agent = MinimaxAgent("MinimaxAgent1")
    player_2_agent = MinimaxAgent("MinimaxAgent2")
    next_player = {
        Player.PLAYER_1: Player.PLAYER_2,
        Player.PLAYER_2: Player.PLAYER_1
    }
    player_map = {
        Player.PLAYER_1: player_1_agent,
        Player.PLAYER_2: player_2_agent
    }

    data_columns = sorted(
        list(basis_vector(Connect4Board(), Player.PLAYER_1).keys()))
    data_columns.append("reward")
    data_columns.append("only_end_reward")
    data = pd.DataFrame(columns=data_columns)

    # loop through games
    for i in range(num_games):
        print("Game %d!" % (i))
        game = Connect4Board()
        curr_player = Player.PLAYER_1
        player_1_episode = [basis_vector(game, Player.PLAYER_1)]
        player_2_episode = [basis_vector(game, Player.PLAYER_2)]

        winner = None
        while True:
            game = game.add_piece(
                curr_player,
                choose_action(player_map[curr_player], curr_player, game))
            player_1_episode.append(basis_vector(game, Player.PLAYER_1))
            player_2_episode.append(basis_vector(game, Player.PLAYER_2))

            game_state = game.check_game_state(curr_player)
            if game_state == GameState.DRAW:
                print("DRAW")
                break
            if game_state == GameState.PLAYER_1_WIN:
                winner = Player.PLAYER_1
                print("PLAYER 1 WON!")
                break
            if game_state == GameState.PLAYER_2_WIN:
                winner = Player.PLAYER_2
                print("PLAYER 2 WON!")
                break
            curr_player = next_player[curr_player]

        if winner != None:
            episode_data = episode_to_data(player_1_episode, player_2_episode,
                                           winner, data_columns)
            print("Shape of episode data:", episode_data.shape)
            data = data.append(episode_data)
    export_data(data)
示例#2
0
def run_game(print_board=False):
  game = Connect4Board()
  next_player = {Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1}
  curr_player = Player.PLAYER_1

  player_1_agent = HumanAgent("The Human")
  #player_1_agent = MinimaxAgent("The replacement human")
  #player_2_agent = HumanAgent("Player 2")
  #player_2_agent = MinimaxAgent("The AI")
  #player_2_agent = QLearningAgent("The AI", use_offline_params=True)
  #player_2_agent = ForwardSearchAgent("The AI")
  player_2_agent = MCTSAgent("The AI")
  player_map = {Player.PLAYER_1 : player_1_agent, Player.PLAYER_2 : player_2_agent}

  while True:
    if print_board:
      game.print_board()
      print("================================")

    game = game.add_piece(curr_player, player_map[curr_player].get_action(curr_player, game))

    game_state = game.check_game_state(curr_player)
    if game_state == GameState.DRAW:
      print("DRAW!!!")
      game.print_board()
      return
    elif game_state == GameState.PLAYER_1_WIN:
      print("PLAYER 1 WINS!!!")
      game.print_board()
      return
    elif game_state == GameState.PLAYER_2_WIN:
      print("PLAYER 2 WINS!!!")
      game.print_board()
      return
    curr_player = next_player[curr_player]
def test_win_condition():
  # check vertical
  for row in range(6-3):
    for col in range(7):
      game = Connect4Board()
      for i in range(row, row+4):
        game.board[i][col] = Player.PLAYER_1
      if not game.check_win(Player.PLAYER_1):
        return False, "Failed vertical win condition for player 1", game
      game = Connect4Board()
      for i in range(row, row+4):
        game.board[i][col] = Player.PLAYER_2
      if not game.check_win(Player.PLAYER_2):
        return False, "Failed vertical win condition for player 2", game


  # check horizontal
  for row in range(6):
    for col in range(7-3):
      game = Connect4Board()
      for i in range(col, col+4):
        game.board[row][i] = Player.PLAYER_1
      if not game.check_win(Player.PLAYER_1):
        return False, "Failed horizontal win condition for player 1", game
      game = Connect4Board()
      for i in range(col, col+4):
        game.board[row][i] = Player.PLAYER_2
      if not game.check_win(Player.PLAYER_2):
        return False, "Failed horizontal win condition for player 2", game

  # check diagonal
  for row in range(6-3):
    for col in range(7-3):
      game = Connect4Board()
      for i in range(4):
        game.board[row+i][col+i] = Player.PLAYER_1
      if not game.check_win(Player.PLAYER_1):
        return False, "Failed diagonal / win condition for player 1", game
      game = Connect4Board()
      for i in range(4):
        game.board[row+i][col+i] = Player.PLAYER_2
      if not game.check_win(Player.PLAYER_2):
        return False, "Failed diagonal / win condition for player 2", game
  for row in range(6-3):
    for col in range(3,7):
      game = Connect4Board()
      for i in range(4):
        game.board[row+i][col-i] = Player.PLAYER_1
      if not game.check_win(Player.PLAYER_1):
        return False, "Failed diagonal \ win condition for player 1:", game
      game = Connect4Board()
      for i in range(4):
        game.board[row+i][col-i] = Player.PLAYER_2
      if not game.check_win(Player.PLAYER_2):
        return False, "Failed diagonal \ win condition for player 2", game

  return True, None, None
示例#4
0
def start_connect4(width, height, connect):
    player = request.remote_addr
    lobby = game_manager.get_game(player)

    width = int(width)
    height = int(height)
    connect = int(connect)
    print('starting connect4', lobby.get_name())
    board = Connect4Board(lobby.get_players(), width, height, connect)
    lobby.start_game(board)
    emit('goto_game', '/connect4', room=lobby.get_name())
示例#5
0
def run_game(print_board=False):
    game = Connect4Board()
    next_player = {
        Player.PLAYER_1: Player.PLAYER_2,
        Player.PLAYER_2: Player.PLAYER_1
    }
    curr_player = Player.PLAYER_1

    player_1_agent = HumanAgent("Player 1")
    #  player_2_agent = HumanAgent("Player 2")
    player_2_agent = MinimaxAgent(Player.PLAYER_2)
    player_map = {
        Player.PLAYER_1: player_1_agent,
        Player.PLAYER_2: player_2_agent
    }

    while True:
        if print_board:
            game.print_board()
            print("================================")

        game = game.add_piece(curr_player,
                              player_map[curr_player].get_action(game))

        game_state = game.check_game_state(curr_player)
        if game_state == GameState.DRAW:
            print("DRAW!!!")
            game.print_board()
            return
        elif game_state == GameState.PLAYER_1_WIN:
            print("PLAYER 1 WINS!!!")
            game.print_board()
            return
        elif game_state == GameState.PLAYER_2_WIN:
            print("PLAYER 2 WINS!!!")
            game.print_board()
            return
        curr_player = next_player[curr_player]
def play_games(num_games=6):
  agent_list = instantiate_agents()
  # set up counts for win/loss
  scoreboard = { agent.get_name() : { other_agent.get_name() : {"win":0, "loss":0, "draw":0} for other_agent in agent_list if other_agent.get_name() != agent.get_name()} for agent in agent_list}

  # loop through games
  next_player = {Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1}
  for player_1_idx in range(len(agent_list)):
    for player_2_idx in range(player_1_idx+1, len(agent_list)):
      player_1_agent = agent_list[player_1_idx]
      player_2_agent = agent_list[player_2_idx]
      print("%s VS %s:" %(player_1_agent.get_name(), player_2_agent.get_name()))
      player_map = {Player.PLAYER_1 : player_1_agent, Player.PLAYER_2 : player_2_agent}

      for i in range(num_games // 2):
        print("Game %d" %(i+1))
        game = Connect4Board()
        curr_player = Player.PLAYER_1
        winner = None

        while True:
          #game.print_board()
          #print("=======================")
          game = game.add_piece(curr_player, player_map[curr_player].get_action(curr_player, game))

          game_state = game.check_game_state(curr_player)
          if game_state == GameState.DRAW:
            scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["draw"] += 1
            scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["draw"] += 1
            winner = None
            break
          if game_state == GameState.PLAYER_1_WIN:
            scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1
            scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1
            winner = player_1_agent.get_name()
            break
          if game_state == GameState.PLAYER_2_WIN:
            scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1
            scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1
            winner = player_1_agent.get_name()
            break
          curr_player = next_player[curr_player]

        if winner != None:
          print(winner, "won!")
        else:
          print("Draw!")

      player_2_agent = agent_list[player_1_idx]
      player_1_agent = agent_list[player_2_idx]
      print("%s VS %s:" %(player_1_agent.get_name(), player_2_agent.get_name()))
      player_map = {Player.PLAYER_1 : player_1_agent, Player.PLAYER_2 : player_2_agent}

      for i in range(num_games // 2):
        print("Game %d" %(i+1))
        game = Connect4Board()
        curr_player = Player.PLAYER_1
        winner = None

        while True:
          #game.print_board()
          #print("=======================")
          game = game.add_piece(curr_player, player_map[curr_player].get_action(curr_player, game))

          game_state = game.check_game_state(curr_player)
          if game_state == GameState.DRAW:
            scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["draw"] += 1
            scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["draw"] += 1
            winner = None
            break
          if game_state == GameState.PLAYER_1_WIN:
            scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1
            scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1
            winner = player_1_agent.get_name()
            break
          if game_state == GameState.PLAYER_2_WIN:
            scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1
            scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1
            winner = player_1_agent.get_name()
            break
          curr_player = next_player[curr_player]

        if winner != None:
          print(winner, "won!")
        else:
          print("Draw!")

  # output counts
  print_scoreboard(scoreboard)
def training(num_iterations, discount_factor=0.8):
    next_player = {
        Player.PLAYER_1: Player.PLAYER_2,
        Player.PLAYER_2: Player.PLAYER_1
    }

    # initialize theta
    theta = {
        "player_2_out_of_4": 0,
        "opponent_2_out_of_4": 0,
        "player_3_out_of_4": 0.01,
        "opponent_3_out_of_4": -0.01,
        "player_3_out_of_5": 0.05,
        "opponent_3_out_of_5": -0.05,
        "player_num_possible_wins_in_col_0": 0,
        "player_num_possible_wins_in_col_1": 0,
        "player_num_possible_wins_in_col_2": 0,
        "player_num_possible_wins_in_col_3": 0,
        "player_num_possible_wins_in_col_4": 0,
        "player_num_possible_wins_in_col_5": 0,
        "player_num_possible_wins_in_col_6": 0,
        "opponent_num_possible_wins_in_col_0": 0,
        "opponent_num_possible_wins_in_col_1": 0,
        "opponent_num_possible_wins_in_col_2": 0,
        "opponent_num_possible_wins_in_col_3": 0,
        "opponent_num_possible_wins_in_col_4": 0,
        "opponent_num_possible_wins_in_col_5": 0,
        "opponent_num_possible_wins_in_col_6": 0,
        "player_num_consecutive_possible_wins_in_col_0": 0.05,
        "player_num_consecutive_possible_wins_in_col_1": 0.05,
        "player_num_consecutive_possible_wins_in_col_2": 0.05,
        "player_num_consecutive_possible_wins_in_col_3": 0.05,
        "player_num_consecutive_possible_wins_in_col_4": 0.05,
        "player_num_consecutive_possible_wins_in_col_5": 0.05,
        "player_num_consecutive_possible_wins_in_col_6": 0.05,
        "opponent_num_consecutive_possible_wins_in_col_0": -0.05,
        "opponent_num_consecutive_possible_wins_in_col_1": -0.05,
        "opponent_num_consecutive_possible_wins_in_col_2": -0.05,
        "opponent_num_consecutive_possible_wins_in_col_3": -0.05,
        "opponent_num_consecutive_possible_wins_in_col_4": -0.05,
        "opponent_num_consecutive_possible_wins_in_col_5": -0.05,
        "opponent_num_consecutive_possible_wins_in_col_6": -0.05,
        "player_win": 10,
        "opponent_win": -10
    }
    N = {}

    # loop through games
    for i in range(num_iterations):
        print("Iteration %d!" % (i + 1))
        game = Connect4Board()
        agent_player = Player.PLAYER_1 if i % 2 == 0 else Player.PLAYER_2
        opp_player = Player.PLAYER_1 if i % 2 != 0 else Player.PLAYER_2
        curr_player = Player.PLAYER_1
        game_end = False
        winner = None

        while True:
            # Choose action based on theta^T * basis + some exploration
            action = epsilon_greedy(game, curr_player, theta)

            # Observe new next state and reward
            game = game.add_piece(curr_player, action)
            reward = 0
            if game.check_draw():
                reward = 0
                game_end = True
            elif game.check_win(agent_player):
                reward = 1
                game_end = True
                winner = agent_player
            elif game.check_win(opp_player):
                reward = -1
                game_end = True
                winner = opp_player

            # Find the action that maximizes q for the next player
            valid_next_actions = [
                action for action in range(game.NUM_COLS)
                if game.valid_action(action)
            ]
            next_q = float("-inf")
            next_action = None
            for action in valid_next_actions:
                temp_basis = basis_vector(
                    game.add_piece(next_player[curr_player], action),
                    next_player[curr_player])
                val = calculate_q(theta, temp_basis)
                if val > next_q:
                    next_q = val
                    next_action = action

            # Update Theta
            board_string = game.serialize_board()
            if board_string not in N:
                N[board_string] = 1
            else:
                N[board_string] += 1
            alpha = 1.0 / N[board_string]
            basis = basis_vector(game, agent_player)
            if len(valid_next_actions) != 0 and not game_end:
                # Using reward clipping to prevent exploding Q-values
                coefficient = alpha * max(1.0, min(-1.0, (reward \
                    + discount_factor*calculate_q(theta, basis_vector(game.add_piece(next_player[curr_player], next_action), agent_player)) \
                    - calculate_q(theta, basis))))
            else:
                coefficient = alpha * reward
            for key in theta.keys():
                theta[key] += coefficient * basis[key]

            if game_end:
                if winner == None:
                    print("DRAW!")
                elif winner == agent_player:
                    print("WON!")
                elif winner == opp_player:
                    print("LOST!")
                break
            curr_player = next_player[curr_player]
    return theta