Exemplo n.º 1
0
    def test_inference(self):
        with tempfile.TemporaryDirectory() as working_dir, \
            tempfile.TemporaryDirectory() as export_dir:
            dualnet.bootstrap(working_dir, model_params.DummyMiniGoParams())
            exported_model = os.path.join(export_dir, 'bootstrap-model')
            dualnet.export_model(working_dir, exported_model)

            n1 = dualnet.DualNetRunner(exported_model,
                                       model_params.DummyMiniGoParams())
            n1.run(go.Position(utils_test.BOARD_SIZE))

            n2 = dualnet.DualNetRunner(exported_model,
                                       model_params.DummyMiniGoParams())
            n2.run(go.Position(utils_test.BOARD_SIZE))
Exemplo n.º 2
0
def evaluate(trained_models_dir, black_model_name, white_model_name,
             evaluate_dir, params):
    """Evaluate with two models.

  With the model name, construct two DualNetRunners to play as black and white
  in a Go match. Two models play several names, and the model that wins by a
  margin of 55% will be the winner.

  Args:
    trained_models_dir: Directories where the completed generations/models are.
    black_model_name: The name of the model playing black.
    white_model_name: The name of the model playing white.
    evaluate_dir: Where to write the evaluation results. Set as
      'base_dir/sgf/evaluate/''
    params: An object of hyperparameters for the model.

  Returns:
    The model name of the winner.

  Raises:
      ValueError: if neither `WHITE` or `BLACK` is returned.
  """

    black_model = os.path.join(trained_models_dir, black_model_name)
    white_model = os.path.join(trained_models_dir, white_model_name)

    print('Evaluate models between {} and {}'.format(black_model_name,
                                                     white_model_name))

    _ensure_dir_exists(evaluate_dir)

    with utils.logged_timer('Loading weights'):
        black_net = dualnet.DualNetRunner(black_model, params)
        white_net = dualnet.DualNetRunner(white_model, params)

    with utils.logged_timer('{} games'.format(params.eval_games)):
        winner = evaluation.play_match(params, black_net, white_net,
                                       params.eval_games, params.eval_readouts,
                                       evaluate_dir, params.eval_verbose)

    if winner != go.WHITE_NAME and winner != go.BLACK_NAME:
        raise ValueError('Winner should be either White or Black!')

    return black_model_name if winner == go.BLACK_NAME else white_model_name
def _prepare_selfplay(
    model_name, trained_models_dir, selfplay_dir, holdout_dir, sgf_dir, params):
  """Set directories and load the network for selfplay.

  Args:
    model_name: The name of the model for self-play
    trained_models_dir: Directories where the completed generations/models are.
    selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'.
    holdout_dir: Where to write the holdout data. Set as
      'base_dir/data/holdout/'.
    sgf_dir: Where to write the sgf (Smart Game Format) files. Set as
      'base_dir/sgf/'.
    params: A MiniGoParams instance of hyperparameters for the model.

  Returns:
    The directories and network model for selfplay.
  """
  # Set paths for the model with 'model_name'
  model_path = os.path.join(trained_models_dir, model_name)
  output_dir = os.path.join(selfplay_dir, model_name)
  holdout_dir = os.path.join(holdout_dir, model_name)
  # clean_sgf is to write sgf file without comments.
  # full_sgf is to write sgf file with comments.
  clean_sgf = os.path.join(sgf_dir, model_name, 'clean')
  full_sgf = os.path.join(sgf_dir, model_name, 'full')

  _ensure_dir_exists(output_dir)
  _ensure_dir_exists(holdout_dir)
  _ensure_dir_exists(clean_sgf)
  _ensure_dir_exists(full_sgf)
  selfplay_dirs = {
      'output_dir': output_dir,
      'holdout_dir': holdout_dir,
      'clean_sgf': clean_sgf,
      'full_sgf': full_sgf
  }
  # cache the network model for self-play
  with utils.logged_timer('Loading weights from {} ... '.format(model_path)):
    network = dualnet.DualNetRunner(model_path, params)
  return selfplay_dirs, network
def main(_):
  """Run the reinforcement learning loop."""
  tf.logging.set_verbosity(tf.logging.INFO)

  params = _set_params(FLAGS)

  # A dummy model for debug/testing purpose with fewer games and iterations
  if FLAGS.test:
    params = model_params.DummyMiniGoParams()
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size_dummy/'
  else:
    # Set directories for models and datasets
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size/'

  dirs = utils.MiniGoDirectory(base_dir)

  # Run selfplay only if user specifies the argument.
  if FLAGS.selfplay:
    selfplay_model_name = FLAGS.selfplay_model_name or utils.get_latest_model(
        dirs.trained_models_dir)[1]
    max_games = FLAGS.selfplay_max_games or params.max_games_per_generation
    run_selfplay(selfplay_model_name, max_games, dirs, params)
    return

  # Run the RL pipeline
  # if no models have been trained, start from bootstrap model

  if not os.path.isdir(dirs.trained_models_dir):
    print('No trained model exists! Starting from Bootstrap...')
    print('Creating random initial weights...')
    bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params)
  else:
    print('A MiniGo base directory has been found! ')
    print('Start from the last checkpoint...')

  _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir)
  for rl_iter in range(params.max_iters_per_pipeline):
    print('RL_iteration: {}'.format(rl_iter))
    # Self-play with the best model to generate training data
    run_selfplay(
        best_model_so_far, params.max_games_per_generation, dirs, params)

    # gather selfplay data for training
    print('Gathering game output...')
    gather(dirs.selfplay_dir, dirs.training_chunk_dir, params)

    # train the next generation model
    model_num, _ = utils.get_latest_model(dirs.trained_models_dir)
    print('Training on gathered game data...')
    train(dirs.trained_models_dir, dirs.estimator_model_dir,
          dirs.training_chunk_dir, model_num + 1, params)

    # validate the latest model if needed
    if FLAGS.validation:
      print('Validating on the holdout game data...')
      validate(dirs.trained_models_dir, dirs.holdout_dir,
               dirs.estimator_model_dir, params)

    _, current_model = utils.get_latest_model(dirs.trained_models_dir)

    if FLAGS.evaluation:  # Perform evaluation if needed
      print('Evaluate models between {} and {}'.format(
          best_model_so_far, current_model))
      black_model = os.path.join(dirs.trained_models_dir, best_model_so_far)
      white_model = os.path.join(dirs.trained_models_dir, current_model)
      _ensure_dir_exists(dirs.evaluate_dir)
      with utils.logged_timer('Loading weights'):
        black_net = dualnet.DualNetRunner(black_model, params)
        white_net = dualnet.DualNetRunner(white_model, params)

      best_model_so_far = evaluate(
          best_model_so_far, black_net, current_model, white_net,
          dirs.evaluate_dir, params)
      print('Winner of evaluation: {}!'.format(best_model_so_far))
    else:
      best_model_so_far = current_model
Exemplo n.º 5
0
def selfplay(model_name, trained_models_dir, selfplay_dir, holdout_dir,
             sgf_dir, params):
    """Perform selfplay with a specific model.

  Args:
    model_name: The name of the model used for selfplay.
    trained_models_dir: The path to the model files.
    selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'.
    holdout_dir: Where to write the holdout data. Set as
      'base_dir/data/holdout/'.
    sgf_dir: Where to write the sgf (Smart Game Format) files. Set as
      'base_dir/sgf/'.
    params: An object of hyperparameters for the model.
  """
    print('Playing a game with model {}'.format(model_name))
    # Set paths for the model with 'model_name'
    model_path = os.path.join(trained_models_dir, model_name)
    output_dir = os.path.join(selfplay_dir, model_name)
    holdout_dir = os.path.join(holdout_dir, model_name)
    # clean_sgf is to write sgf file without comments.
    # full_sgf is to write sgf file with comments.
    clean_sgf = os.path.join(sgf_dir, model_name, 'clean')
    full_sgf = os.path.join(sgf_dir, model_name, 'full')

    _ensure_dir_exists(output_dir)
    _ensure_dir_exists(holdout_dir)
    _ensure_dir_exists(clean_sgf)
    _ensure_dir_exists(full_sgf)

    with utils.logged_timer('Loading weights from {} ... '.format(model_path)):
        network = dualnet.DualNetRunner(model_path, params)

    with utils.logged_timer('Playing game'):
        player = selfplay_mcts.play(params.board_size, network,
                                    params.selfplay_readouts,
                                    params.selfplay_resign_threshold,
                                    params.simultaneous_leaves,
                                    params.selfplay_verbose)

    output_name = '{}-{}'.format(int(time.time()), socket.gethostname())

    def _write_sgf_data(dir_sgf, use_comments):
        with tf.gfile.GFile(
                os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f:
            f.write(player.to_sgf(use_comments=use_comments))

    _write_sgf_data(clean_sgf, use_comments=False)
    _write_sgf_data(full_sgf, use_comments=True)

    game_data = player.extract_data()
    tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params)

    # Hold out 5% of games for evaluation.
    if random.random() < params.holdout_pct:
        fname = os.path.join(holdout_dir,
                             ('{}' + _TF_RECORD_SUFFIX).format(output_name))
    else:
        fname = os.path.join(output_dir,
                             ('{}' + _TF_RECORD_SUFFIX).format(output_name))

    preprocessing.write_tf_examples(fname, tf_examples)