示例#1
0
    def test_inference(self):
        with tempfile.TemporaryDirectory() as working_dir, \
            tempfile.TemporaryDirectory() as export_dir:
            dualnet.bootstrap(working_dir, model_params.DummyMiniGoParams())
            exported_model = os.path.join(export_dir, 'bootstrap-model')
            dualnet.export_model(working_dir, exported_model)

            n1 = dualnet.DualNetRunner(exported_model,
                                       model_params.DummyMiniGoParams())
            n1.run(go.Position(utils_test.BOARD_SIZE))

            n2 = dualnet.DualNetRunner(exported_model,
                                       model_params.DummyMiniGoParams())
            n2.run(go.Position(utils_test.BOARD_SIZE))
示例#2
0
 def test_train(self):
   with tempfile.TemporaryDirectory() as working_dir, \
       tempfile.NamedTemporaryFile() as tf_record:
     preprocessing.make_dataset_from_sgf(
         utils_test.BOARD_SIZE, 'example_game.sgf', tf_record.name)
     dualnet.train(
         working_dir, [tf_record.name], 1, model_params.DummyMiniGoParams())
 def extract_data(self, tf_record, filter_amount=1):
     pos_tensor, label_tensors = preprocessing.get_input_tensors(
         model_params.DummyMiniGoParams(),
         1, [tf_record],
         num_repeats=1,
         shuffle_records=False,
         shuffle_examples=False,
         filter_amount=filter_amount)
     recovered_data = []
     with tf.Session() as sess:
         while True:
             try:
                 pos_value, label_values = sess.run(
                     [pos_tensor, label_tensors])
                 recovered_data.append(
                     (pos_value, label_values['pi_tensor'],
                      label_values['value_tensor']))
             except tf.errors.OutOfRangeError:
                 break
     return recovered_data
def main(_):
  """Run the reinforcement learning loop."""
  tf.logging.set_verbosity(tf.logging.INFO)

  params = _set_params(FLAGS)

  # A dummy model for debug/testing purpose with fewer games and iterations
  if FLAGS.test:
    params = model_params.DummyMiniGoParams()
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size_dummy/'
  else:
    # Set directories for models and datasets
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size/'

  dirs = utils.MiniGoDirectory(base_dir)

  # Run selfplay only if user specifies the argument.
  if FLAGS.selfplay:
    selfplay_model_name = FLAGS.selfplay_model_name or utils.get_latest_model(
        dirs.trained_models_dir)[1]
    max_games = FLAGS.selfplay_max_games or params.max_games_per_generation
    run_selfplay(selfplay_model_name, max_games, dirs, params)
    return

  # Run the RL pipeline
  # if no models have been trained, start from bootstrap model

  if not os.path.isdir(dirs.trained_models_dir):
    print('No trained model exists! Starting from Bootstrap...')
    print('Creating random initial weights...')
    bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params)
  else:
    print('A MiniGo base directory has been found! ')
    print('Start from the last checkpoint...')

  _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir)
  for rl_iter in range(params.max_iters_per_pipeline):
    print('RL_iteration: {}'.format(rl_iter))
    # Self-play with the best model to generate training data
    run_selfplay(
        best_model_so_far, params.max_games_per_generation, dirs, params)

    # gather selfplay data for training
    print('Gathering game output...')
    gather(dirs.selfplay_dir, dirs.training_chunk_dir, params)

    # train the next generation model
    model_num, _ = utils.get_latest_model(dirs.trained_models_dir)
    print('Training on gathered game data...')
    train(dirs.trained_models_dir, dirs.estimator_model_dir,
          dirs.training_chunk_dir, model_num + 1, params)

    # validate the latest model if needed
    if FLAGS.validation:
      print('Validating on the holdout game data...')
      validate(dirs.trained_models_dir, dirs.holdout_dir,
               dirs.estimator_model_dir, params)

    _, current_model = utils.get_latest_model(dirs.trained_models_dir)

    if FLAGS.evaluation:  # Perform evaluation if needed
      print('Evaluate models between {} and {}'.format(
          best_model_so_far, current_model))
      black_model = os.path.join(dirs.trained_models_dir, best_model_so_far)
      white_model = os.path.join(dirs.trained_models_dir, current_model)
      _ensure_dir_exists(dirs.evaluate_dir)
      with utils.logged_timer('Loading weights'):
        black_net = dualnet.DualNetRunner(black_model, params)
        white_net = dualnet.DualNetRunner(white_model, params)

      best_model_so_far = evaluate(
          best_model_so_far, black_net, current_model, white_net,
          dirs.evaluate_dir, params)
      print('Winner of evaluation: {}!'.format(best_model_so_far))
    else:
      best_model_so_far = current_model
示例#5
0
def main(_):
    """Run the reinforcement learning loop."""
    tf.logging.set_verbosity(tf.logging.INFO)

    params = _set_params_from_board_size(FLAGS.board_size)

    # A dummy model for debug/testing purpose with fewer games and iterations
    if FLAGS.debug:
        params = model_params.DummyMiniGoParams()

    # Set directories for models and datasets
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_board_size/'
    dirs = utils.MiniGoDirectory(base_dir)
    # if no models have been trained, start from bootstrap model
    if os.path.isdir(base_dir) is False:
        print('No trained model exists! Starting from Bootstrap...')
        print('Creating random initial weights...')
        bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params)
    else:
        print('A MiniGo base directory has been found! ')
        print('Start from the last checkpoint...')

    _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir)

    for rl_iter in range(params.max_iters_per_pipeline):
        print('RL_iteration: {}'.format(rl_iter))

        # Self-play to generate at least params.max_games_per_generation games
        selfplay(best_model_so_far, dirs.trained_models_dir, dirs.selfplay_dir,
                 dirs.holdout_dir, dirs.sgf_dir, params)
        games = tf.gfile.Glob(
            os.path.join(dirs.selfplay_dir, best_model_so_far, '*.zz'))
        while len(games) < params.max_games_per_generation:
            selfplay(best_model_so_far, dirs.trained_models_dir,
                     dirs.selfplay_dir, dirs.holdout_dir, dirs.sgf_dir, params)
            if FLAGS.validation:
                params = model_params.DummyValidationParams()
                selfplay(best_model_so_far, dirs.trained_models_dir,
                         dirs.selfplay_dir, dirs.holdout_dir, dirs.sgf_dir,
                         params)
            games = tf.gfile.Glob(
                os.path.join(dirs.selfplay_dir, best_model_so_far, '*.zz'))

        print('Gathering game output...')
        gather(dirs.selfplay_dir, dirs.training_chunk_dir, params)

        print('Training on gathered game data...')
        train(dirs.trained_models_dir, dirs.estimator_model_dir,
              dirs.training_chunk_dir, params)

        if FLAGS.validation:
            print('Validating on the holdout game data...')
            validate(dirs.trained_models_dir, dirs.holdout_dir,
                     dirs.estimator_model_dir, params)

        _, current_model = utils.get_latest_model(dirs.trained_models_dir)
        if FLAGS.evaluation:  # Perform evaluation if needed
            print('Evaluating the latest model...')
            best_model_so_far = evaluate(dirs.trained_models_dir,
                                         best_model_so_far, current_model,
                                         dirs.evaluate_dir, params)
            print('Winner: {}!'.format(best_model_so_far))
        else:
            best_model_so_far = current_model