Пример #1
0
def play(model):
    # 学習データ
    history = []

    # 状態の生成
    state = State()

    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 合法手の確率分布の取得
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([state.pieces_array(), policies, None])

        # 行動の取得
        action = np.random.choice(state.legal_actions(), p=scores)

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
Пример #2
0
def play(model):
  history = []

  state = State()

  while True:
    if state.is_done():
      break
    
    scores = pv_mcts_scores(model, state, SP_TEMPERATURE)
    policies = [0] * DN_OUTPUT_SIZE
    for action, policy in zip(state.legal_actions(), scores):
      policies[action] = policy
    history.append([state.pieces_array(), policies, None])

    action = np.random.choice(state.legal_actions(), p=scores)

    state = state.next(action)

  value = first_player_value(state)
  for i in range(len(history)):
    history[i][2] = value
    value = -value
  
  return history
Пример #3
0
def play(model):
    # 학습 데이터
    history = []

    # 상태 생성
    state = State()

    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 합법적인 수의 확률 분포 얻기
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 학습 데이터에 상태와 정책 추가
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([state.pieces_array(), policies, None])

        # 행동 얻기
        action = np.random.choice(state.legal_actions(), p=scores)

        # 다음 상태 얻기
        state = state.next(action)

        # 학습 데이터에 가치 추가
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history