Пример #1
0
def get_reward(board, state, action, new_state, immediate_reward):
  if immediate_reward != 0: return immediate_reward
  try:
    opponant_action_index = np.nanargmax(Q[new_state,:])
  except:
    print("EXCEPTION: opponant has no legal moves. num_moves: " + str(len(board.move_stack)))
    print("white's move" if board.turn == chess.WHITE else "black's move")
    print(serializers.unicode(board))
    for _ in len(board.move_stack):
      board.pop()
      print(serializers.unicode(board))
    raise

  opponant_move = generate_move_from_action(board, opponant_action_index)
  if opponant_move == None:
    invalid_action(new_state, opponant_action_index)
    return get_reward(board, state, action, new_state, immediate_reward)

  board.push(opponant_move)
  resulting_state = q_lookup[satg.board_key(board)]
  board.pop()

  learned = gamma * np.nanmax(Q[resulting_state,:])

  return (1 - alpha) * Q[state,action] + alpha * learned
Пример #2
0
def get_valid_move(board, state, i):
  try:
    if use_book_exploration_policy:
      k_raised_by_a = lambda a: 0 if np.isnan(a) else probability_constant ** a
      array = Q[state,:]
      minimum = np.min(array)
      if minimum < 0:
        array = array - minimum

      numerators = list(map(k_raised_by_a, array))

      if len(Q[state,:][np.logical_not(np.isnan(Q[state,:]))]) == 0: return None
      action_index = probability_choice(numerators)
    else:
      if len(Q[state,:][np.logical_not(np.isnan(Q[state,:]))]) == 0: return None
      action_index = np.nanargmax(Q[state,:] + np.random.randn(action_count)*(np.float64(2 * num_episodes) / np.float64(i+1)))
  except:
    print("EXCEPTION: num_moves: " + str(len(board.move_stack)))
    print("iteration: " + str(i))
    print("white's move" if board.turn == chess.WHITE else "black's move")
    print(serializers.unicode(board))
    for _ in range(len(board.move_stack)):
      board.pop()
      print(serializers.unicode(board))
    raise
  move = generate_move_from_action(board, action_index)

  if move == None:
    invalid_action(state, action_index)
    return get_valid_move(board, state, i)

  return move, action_index
Пример #3
0
def print_status_update(board, immediate_reward, before_reward, reward,
                        old_state, action):
    last_board_unicode = serializers.unicode(board)
    board.pop()
    penultimate_board_unicode = serializers.unicode(board)
    print(penultimate_board_unicode)
    print(last_board_unicode)
    print("winner: " + ("black" if board.turn == chess.BLACK else "white"))
    print("immediate_reward: " + str(immediate_reward))
    print("reward: (" + str(reward) + ") " + str(before_reward) + " => " +
          str(Q[old_state, action]))
    print(Q[old_state, :])
    print("reached destination!")
Пример #4
0
def start():
  print("Begin!")
  for i in range(num_episodes):
    board = generators.random_krk_board(is_4x4_game)
    state = q_lookup[satg.board_key(board)]
    is_destination = get_immediate_reward(board, state, i) != 0
    if is_destination: continue
    winner = 0
    for j in range(1000):
      move, action = get_valid_move(board, state, i)
      board.push(move)

      old_state = state
      state = q_lookup[satg.board_key(board)]

      immediate_reward = get_immediate_reward(board, state, i)
      is_destination = immediate_reward != 0

      if is_destination:
        winning_boards.append(board.copy())
        if board.turn == chess.BLACK:
          if immediate_reward > 0: winner = chess.WHITE
          else: winner = chess.BLACK
        else:
          if immediate_reward > 0: winner = chess.BLACK
          else: winner = chess.WHITE
  
      reward = get_reward(board,old_state,action,state,immediate_reward)
      Q[old_state,action] = reward
  
      if is_destination:
        Q[state,:] = -reward # should be an impossible state since the game is over, but helps with training
        break
    if is_destination:
      if winner == chess.WHITE:
        results[i % 100] = 1
      else:
        results[i % 100] = 2
    else:
      results[i % 100] = 0
    if i % 100 == 0:
      unique, counts = np.unique(results, return_counts=True)
      result = dict(zip(unique, counts))
      total_wins_last_100 = result.get(1, 0) + result.get(2, 0)
      if total_wins_last_100 == 0:
        print("No wins in the past 100 games.")
      else:
        if board_logging_enabled:
          for b in winning_boards: print(serializers.unicode(b))
        white_wins_last_100 = 100 * result.get(1, 0)/total_wins_last_100
        black_wins_last_100 = 100 * result.get(2, 0)/total_wins_last_100
        print("White wins: " + str(white_wins_last_100) + "% || Black wins: " + str(black_wins_last_100) + "% || Total wins: " + str(total_wins_last_100))
        print("(" + str(i) + "/" + str(num_episodes) + ")")
        winning_boards.clear()
    if (1+i) % 10000 == 0: upgrade_probability_constant()
    if ((1+i) % 100000 == 0): satg.serialize("state_action_table_" + str(i) + ".bin", Q)
Пример #5
0
def get_valid_move(board, state):
    try:
        action_index = np.nanargmax(
            Q[state, :] + np.random.randn(action_count) *
            (np.float64(2 * num_episodes) / np.float64(i + 1)))
    except:
        print("EXCEPTION: num_moves: " + str(len(board.move_stack)))
        print("white's move" if board.turn == chess.WHITE else "black's move")
        print(serializers.unicode(board))
        for _ in len(board.move_stack):
            board.pop()
            print(serializers.unicode(board))
            raise
    move = generate_move_from_action(board, action_index)

    if move == None:
        invalid_action(state, action_index)
        return get_valid_move(board, state)

    return move, action_index