Exemplo n.º 1
0
    def action(self):
        """计算落子并返回坐标"""

        # 1、模拟完全信息棋面
        with utils.logged_timer("simulation"):
            self.board_sims = []
            self.simOppLatest()

        print('num_sims: ', len(self.board_sims))

        #print('one of sim:\n',self.board_sims[-1])

        if len(self.board_sims) == 0:
            # 若模拟对手棋面失败,仅输入己方棋面信息
            tmpGo = Position(n=9, board=self.board_selfNow, to_play=self.color)
            self.board_sims.append(tmpGo)

        # 2、计算每个可行位置的总得分
        with utils.logged_timer("calculation"):
            pbs, vs = self.scoreNet.run_many(self.board_sims)
            scoreBoard = np.sum(pbs, axis=0)

        # 自己的位置得分设为零
        selfPlaces = np.transpose(np.nonzero(self.board_selfNow))
        for sp in selfPlaces:
            scoreBoard[sp[0] * 9 + sp[1]] = 0

        # 自己内部的气不准下
        board_innerQi = self.findInnerQi()
        scoreBoard.flat[[
            i for (i, x) in enumerate(board_innerQi.flat) if x == 1
        ]] = 0

        # illegal的位置得分设为零
        scoreBoard.flat[[
            i for (i, x) in enumerate(self.illegalBoard.flat) if x == 1
        ]] = 0

        # 不主动pass
        scoreBoard = scoreBoard[:81]

        #print('scoreBoard:\n',scoreBoard)

        # pass的情况
        if scoreBoard.sum() == 0:
            action = [-1, -1]
            self.tryAction = action
        else:
            flatMaxIdx = np.argmax(scoreBoard)
            action = [int(flatMaxIdx / 9), int(flatMaxIdx % 9)]
            self.tryAction = action

        with closing(shelve.open('buffer', 'c')) as shelf:
            shelf['color'] = self.color
            shelf['board_selfNow'] = self.board_selfNow
            shelf['board_opp_known'] = self.board_opp_known
            shelf['num_oppStones'] = self.num_oppStones

        return action
Exemplo n.º 2
0
def validate(
        *tf_record_dirs: 'Directories where holdout data are',
        validate_name: 'Name for validation set (i.e., selfplay or human)'=None):
    tf_records = []
    with utils.logged_timer("Building lists of holdout files"):
        for record_dir in tf_record_dirs:
            tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))

    first_record = os.path.basename(tf_records[0])
    last_record = os.path.basename(tf_records[-1])
    with utils.logged_timer("Validating from {} to {}".format(first_record, last_record)):
        dual_net.validate(tf_records, validate_name=validate_name)
Exemplo n.º 3
0
def evaluate(
        black_model: 'The path to the model to play black',
        white_model: 'The path to the model to play white',
        output_dir: 'Where to write the evaluation results' = 'sgf/evaluate',
        games: 'the number of games to play' = 16,
        verbose: 'How verbose the players should be (see selfplay)' = 1):
    utils.ensure_dir_exists(output_dir)

    with utils.logged_timer("Loading weights"):
        black_net = dual_net.DualNetwork(black_model)
        white_net = dual_net.DualNetwork(white_model)

    with utils.logged_timer("Playing game"):
        evaluation.play_match(black_net, white_net, games, output_dir, verbose)
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        try:
            rl_loop()
        finally:
            asyncio.get_event_loop().close()
Exemplo n.º 5
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s", len(tf_records),
                 tf_records[0], tf_records[-1])

    if FLAGS.dist_train:
        hvd.init()

    mllogger = mllog.get_mllogger()
    mllog.config(filename="train.log")

    mllog.config(default_namespace="worker1",
                 default_stack_offset=1,
                 default_clear_line=False)

    with utils.logged_timer("Training"):
        train(*tf_records)
    if (not FLAGS.dist_train) or hvd.rank() == 0:
        if FLAGS.export_path:
            dual_net.export_model(FLAGS.export_path)
            epoch = int(os.path.basename(FLAGS.export_path))
            mllogger.event(key="save_model", value={"Iteration": epoch})
        if FLAGS.freeze:
            dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt,
                                  FLAGS.trt_max_batch_size,
                                  FLAGS.trt_precision,
                                  FLAGS.selfplay_precision)
Exemplo n.º 6
0
def validate(*tf_records):
    """Validate a model's performance on a set of holdout data."""
    if FLAGS.use_tpu:

        def _input_fn(params):
            return preprocessing.get_tpu_input_tensors(
                params['train_batch_size'],
                params['input_layout'],
                tf_records,
                filter_amount=1.0)
    else:

        def _input_fn():
            return preprocessing.get_input_tensors(FLAGS.train_batch_size,
                                                   FLAGS.input_layout,
                                                   tf_records,
                                                   filter_amount=1.0,
                                                   shuffle_examples=False)

    steps = FLAGS.examples_to_validate // FLAGS.train_batch_size
    if FLAGS.use_tpu:
        steps //= FLAGS.num_tpu_cores

    estimator = dual_net.get_estimator()
    with utils.logged_timer("Validating"):
        estimator.evaluate(_input_fn, steps=steps, name=FLAGS.validate_name)
Exemplo n.º 7
0
def validate(
        working_dir: 'tf.estimator working directory',
        *tf_record_dirs: 'Directories where holdout data are',
        checkpoint_name: 'Which checkpoint to evaluate (None=latest)'=None,
        validate_name: 'Name for validation set (i.e., selfplay or human)'=None):
    tf_records = []
    with utils.logged_timer("Building lists of holdout files"):
        for record_dir in tf_record_dirs:
            tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))

    first_record = os.path.basename(tf_records[0])
    last_record = os.path.basename(tf_records[-1])
    with utils.logged_timer("Validating from {} to {}".format(first_record, last_record)):
        dual_net.validate(
            working_dir, tf_records, checkpoint_name=checkpoint_name,
            validate_name=validate_name)
Exemplo n.º 8
0
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        for target_win_rate in rl_loop():
            if target_win_rate > 0.5:
                return logging.info('Passed exit criteria.')
        logging.info('Failed to converge.')
Exemplo n.º 9
0
def train(trained_models_dir, estimator_model_dir, training_chunk_dir, params):
    """Train the latest model from gathered data.

  Args:
    trained_models_dir: Where to export the completed generation.
    estimator_model_dir: tf.estimator model directory.
    training_chunk_dir: Directory where gathered training chunks are.
    params: An object of hyperparameters for the model.
  """
    model_num, model_name = utils.get_latest_model(trained_models_dir)
    print('Initializing from model {}'.format(model_name))

    new_model_name = utils.generate_model_name(model_num + 1)
    print('New model will be {}'.format(new_model_name))
    save_file = os.path.join(trained_models_dir, new_model_name)

    tf_records = sorted(
        tf.gfile.Glob(os.path.join(training_chunk_dir,
                                   '*' + _TF_RECORD_SUFFIX)))
    tf_records = tf_records[-(params.train_window_size //
                              params.examples_per_chunk):]

    print('Training from: {} to {}'.format(tf_records[0], tf_records[-1]))
    with utils.logged_timer('Training'):
        dualnet.train(estimator_model_dir, tf_records, model_num + 1, params)
        dualnet.export_model(estimator_model_dir, save_file)
def train(trained_models_dir, estimator_model_dir, training_chunk_dir,
          generation, params):
  """Train the latest model from gathered data.

  Args:
    trained_models_dir: Where to export the completed generation.
    estimator_model_dir: tf.estimator model directory.
    training_chunk_dir: Directory where gathered training chunks are.
    generation: Which generation you are training.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
  new_model_name = utils.generate_model_name(generation)
  print('New model will be {}'.format(new_model_name))
  new_model = os.path.join(trained_models_dir, new_model_name)

  print('Training on gathered game data...')
  tf_records = sorted(
      tf.gfile.Glob(os.path.join(training_chunk_dir, '*'+_TF_RECORD_SUFFIX)))
  tf_records = tf_records[
      -(params.train_window_size // params.examples_per_chunk):]

  print('Training from: {} to {}'.format(tf_records[0], tf_records[-1]))
  with utils.logged_timer('Training'):
    dualnet.train(estimator_model_dir, tf_records, generation, params)
    dualnet.export_model(estimator_model_dir, new_model)
Exemplo n.º 11
0
def load_player(model_path):
    print("Loading weights from %s ... " % model_path)
    with logged_timer("Loading weights from %s ... " % model_path):
        network = dual_net.DualNetwork(model_path)
        network.name = os.path.basename(model_path)
    player = MCTSPlayer(network, verbosity=2)
    return player
def evaluate(black_model_name, black_net, white_model_name, white_net,
             evaluate_dir, params):
  """Evaluate with two models.

  With two DualNetRunners to play as black and white in a Go match. Two models
  play several games, and the model that wins by a margin of 55% will be the
  winner.

  Args:
    black_model_name: The name of the model playing black.
    black_net: The DualNetRunner model for black
    white_model_name: The name of the model playing white.
    white_net: The DualNetRunner model for white.
    evaluate_dir: Where to write the evaluation results. Set as
      'base_dir/sgf/evaluate/'.
    params: A MiniGoParams instance of hyperparameters for the model.

  Returns:
    The model name of the winner.

  Raises:
      ValueError: if neither `WHITE` or `BLACK` is returned.
  """
  with utils.logged_timer('{} games'.format(params.eval_games)):
    winner = evaluation.play_match(
        params, black_net, white_net, params.eval_games,
        params.eval_readouts, evaluate_dir, params.eval_verbose)

  if winner != go.WHITE_NAME and winner != go.BLACK_NAME:
    raise ValueError('Winner should be either White or Black!')

  return black_model_name if winner == go.BLACK_NAME else white_model_name
Exemplo n.º 13
0
def evaluate(black_model_name, black_net, white_model_name, white_net,
             evaluate_dir, params):
  """Evaluate with two models.

  With two DualNetRunners to play as black and white in a Go match. Two models
  play several games, and the model that wins by a margin of 55% will be the
  winner.

  Args:
    black_model_name: The name of the model playing black.
    black_net: The DualNetRunner model for black
    white_model_name: The name of the model playing white.
    white_net: The DualNetRunner model for white.
    evaluate_dir: Where to write the evaluation results. Set as
      'base_dir/sgf/evaluate/'.
    params: A MiniGoParams instance of hyperparameters for the model.

  Returns:
    The model name of the winner.

  Raises:
      ValueError: if neither `WHITE` or `BLACK` is returned.
  """
  with utils.logged_timer('{} games'.format(params.eval_games)):
    winner = evaluation.play_match(
        params, black_net, white_net, params.eval_games,
        params.eval_readouts, evaluate_dir, params.eval_verbose)

  if winner != go.WHITE_NAME and winner != go.BLACK_NAME:
    raise ValueError('Winner should be either White or Black!')

  return black_model_name if winner == go.BLACK_NAME else white_model_name
Exemplo n.º 14
0
def train(trained_models_dir, estimator_model_dir, training_chunk_dir,
          generation, params):
  """Train the latest model from gathered data.

  Args:
    trained_models_dir: Where to export the completed generation.
    estimator_model_dir: tf.estimator model directory.
    training_chunk_dir: Directory where gathered training chunks are.
    generation: Which generation you are training.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
  new_model_name = utils.generate_model_name(generation)
  print('New model will be {}'.format(new_model_name))
  new_model = os.path.join(trained_models_dir, new_model_name)

  print('Training on gathered game data...')
  tf_records = sorted(
      tf.gfile.Glob(os.path.join(training_chunk_dir, '*'+_TF_RECORD_SUFFIX)))
  tf_records = tf_records[
      -(params.train_window_size // params.examples_per_chunk):]

  print('Training from: {} to {}'.format(tf_records[0], tf_records[-1]))
  with utils.logged_timer('Training'):
    dualnet.train(estimator_model_dir, tf_records, generation, params)
    dualnet.export_model(estimator_model_dir, new_model)
Exemplo n.º 15
0
def train(tf_records: 'list of files of tf_records to train on',
          model_save_path: 'Where to export the completed generation.'):
    print("Training on:", tf_records[0], "to", tf_records[-1])
    with utils.logged_timer("Training"):
        dual_net.train(*tf_records)
    print("== Training done.  Exporting model to ", model_save_path)
    dual_net.export_model(flags.FLAGS.model_dir, model_save_path)
    freeze_graph(model_save_path)
Exemplo n.º 16
0
def run_game(load_file,
             selfplay_dir=None,
             holdout_dir=None,
             sgf_dir=None,
             holdout_pct=0.05):
    '''Takes a played game and record results and game data.'''
    if sgf_dir is not None:
        minimal_sgf_dir = os.path.join(sgf_dir, 'clean')
        full_sgf_dir = os.path.join(sgf_dir, 'full')
        utils.ensure_dir_exists(minimal_sgf_dir)
        utils.ensure_dir_exists(full_sgf_dir)
    if selfplay_dir is not None:
        utils.ensure_dir_exists(selfplay_dir)
        utils.ensure_dir_exists(holdout_dir)

    with utils.logged_timer("Loading weights from %s ... " % load_file):
        network = dual_net.DualNetwork(load_file)

    with utils.logged_timer("Playing game"):
        player = play(network)

    output_name = '{}-{}'.format(int(time.time()), socket.gethostname())
    game_data = player.extract_data()
    if sgf_dir is not None:
        with gfile.GFile(
                os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)),
                'w') as f:
            f.write(player.to_sgf(use_comments=False))
        with gfile.GFile(
                os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)),
                'w') as f:
            f.write(player.to_sgf())

    tf_examples = preprocessing.make_dataset_from_selfplay(game_data)

    if selfplay_dir is not None:
        # Hold out 5% of games for validation.
        if random.random() < holdout_pct:
            fname = os.path.join(holdout_dir,
                                 "{}.tfrecord.zz".format(output_name))
        else:
            fname = os.path.join(selfplay_dir,
                                 "{}.tfrecord.zz".format(output_name))

        preprocessing.write_tf_examples(fname, tf_examples)
Exemplo n.º 17
0
def train(working_dir: 'tf.estimator working directory.',
          tf_records: 'list of files of tf_records to train on',
          model_save_path: 'Where to export the completed generation.'):
    print("Training on:", tf_records[0], "to", tf_records[-1])
    with utils.logged_timer("Training"):
        dual_net.train(working_dir, tf_records)
    print("== Training done.  Exporting model to ", model_save_path)
    dual_net.export_model(working_dir, model_save_path)
    freeze_graph(model_save_path)
Exemplo n.º 18
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s",
                 len(tf_records), tf_records[0], tf_records[-1])
    with utils.logged_timer("Training"):
        train(*tf_records)
    if FLAGS.export_path:
        dual_net.export_model(FLAGS.export_path)
Exemplo n.º 19
0
def gather(selfplay_dir, training_chunk_dir, params):
    """Gather selfplay data into large training chunk.

  Args:
    selfplay_dir: Where to look for games. Set as 'base_dir/data/selfplay/'.
    training_chunk_dir: where to put collected games. Set as
      'base_dir/data/training_chunks/'.
    params: An object of hyperparameters for the model.
  """
    # Check the selfplay data from the most recent 50 models.
    _ensure_dir_exists(training_chunk_dir)
    sorted_model_dirs = sorted(tf.gfile.ListDirectory(selfplay_dir))
    models = [
        model_dir.strip('/')
        for model_dir in sorted_model_dirs[-params.gather_generation:]
    ]

    with utils.logged_timer('Finding existing tfrecords...'):
        model_gamedata = {
            model: tf.gfile.Glob(
                os.path.join(selfplay_dir, model, '*' + _TF_RECORD_SUFFIX))
            for model in models
        }
    print('Found {} models'.format(len(models)))
    for model_name, record_files in sorted(model_gamedata.items()):
        print('    {}: {} files'.format(model_name, len(record_files)))

    meta_file = os.path.join(training_chunk_dir, 'meta.txt')
    try:
        with tf.gfile.GFile(meta_file, 'r') as f:
            already_processed = set(f.read().split())
    except tf.errors.NotFoundError:
        already_processed = set()

    num_already_processed = len(already_processed)

    for model_name, record_files in sorted(model_gamedata.items()):
        if set(record_files) <= already_processed:
            continue
        print('Gathering files from {}:'.format(model_name))
        tf_examples = preprocessing.shuffle_tf_examples(
            params.shuffle_buffer_size, params.examples_per_chunk,
            record_files)
        # tqdm to make the loops show a smart progress meter
        for i, example_batch in enumerate(tf_examples):
            output_record = os.path.join(training_chunk_dir,
                                         ('{}-{}' + _TF_RECORD_SUFFIX).format(
                                             model_name, str(i)))
            preprocessing.write_tf_examples(output_record,
                                            example_batch,
                                            serialize=False)
        already_processed.update(record_files)

    print('Processed {} new files'.format(
        len(already_processed) - num_already_processed))
    with tf.gfile.GFile(meta_file, 'w') as f:
        f.write('\n'.join(sorted(already_processed)))
Exemplo n.º 20
0
def evaluate(trained_models_dir, black_model_name, white_model_name,
             evaluate_dir, params):
    """Evaluate with two models.

  With the model name, construct two DualNetRunners to play as black and white
  in a Go match. Two models play several names, and the model that wins by a
  margin of 55% will be the winner.

  Args:
    trained_models_dir: Directories where the completed generations/models are.
    black_model_name: The name of the model playing black.
    white_model_name: The name of the model playing white.
    evaluate_dir: Where to write the evaluation results. Set as
      'base_dir/sgf/evaluate/''
    params: An object of hyperparameters for the model.

  Returns:
    The model name of the winner.

  Raises:
      ValueError: if neither `WHITE` or `BLACK` is returned.
  """

    black_model = os.path.join(trained_models_dir, black_model_name)
    white_model = os.path.join(trained_models_dir, white_model_name)

    print('Evaluate models between {} and {}'.format(black_model_name,
                                                     white_model_name))

    _ensure_dir_exists(evaluate_dir)

    with utils.logged_timer('Loading weights'):
        black_net = dualnet.DualNetRunner(black_model, params)
        white_net = dualnet.DualNetRunner(white_model, params)

    with utils.logged_timer('{} games'.format(params.eval_games)):
        winner = evaluation.play_match(params, black_net, white_net,
                                       params.eval_games, params.eval_readouts,
                                       evaluate_dir, params.eval_verbose)

    if winner != go.WHITE_NAME and winner != go.BLACK_NAME:
        raise ValueError('Winner should be either White or Black!')

    return black_model_name if winner == go.BLACK_NAME else white_model_name
Exemplo n.º 21
0
def train(working_dir: 'tf.estimator working directory.',
          tf_records: 'list of files of tf_records to train on',
          model_save_path: 'Where to export the completed generation.',
          generation_num: 'Which generation you are training.' = 0):
    print("Training on:", tf_records[0], "to", tf_records[-1])
    with utils.logged_timer("Training"):
        dual_net.train(working_dir, tf_records, generation_num)
    print("Saving to", model_save_path)
    dual_net.export_model(working_dir, model_save_path)
    freeze_graph(model_save_path)
Exemplo n.º 22
0
def checked_run(cmd, name):
  logging.info('Running %s:\n  %s', name, '\n  '.join(cmd))
  with utils.logged_timer('%s finished' % name.capitalize()):
    completed_process = subprocess.run(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if completed_process.returncode:
      logging.error('Error running %s: %s', name,
                    completed_process.stdout.decode())
      raise RuntimeError('Non-zero return code executing %s' % ' '.join(cmd))
  return completed_process
Exemplo n.º 23
0
def validate(trained_models_dir, holdout_dir, estimator_model_dir, params):
    """Validate the latest model on the holdout dataset.

  Args:
    trained_models_dir: Directories where the completed generations/models are.
    holdout_dir: Directories where holdout data are.
    estimator_model_dir: tf.estimator model directory.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
    model_num, _ = utils.get_latest_model(trained_models_dir)

    # Get the holdout game data
    nums_names = utils.get_models(trained_models_dir)

    # Model N was trained on games up through model N-1, so the validation set
    # should only be for models through N-1 as well, thus the (model_num) term.
    models = [num_name for num_name in nums_names if num_name[0] < model_num]

    # pair is a tuple of (model_num, model_name), like (13, 000013-modelname)
    holdout_dirs = [
        os.path.join(holdout_dir, pair[1])
        for pair in models[-params.holdout_generation:]
    ]
    tf_records = []
    with utils.logged_timer('Building lists of holdout files'):
        for record_dir in holdout_dirs:
            if os.path.exists(record_dir):  # make sure holdout dir exists
                tf_records.extend(
                    tf.gfile.Glob(
                        os.path.join(record_dir, '*' + _TF_RECORD_SUFFIX)))

    if not tf_records:
        print('No holdout dataset for validation! '
              'Please check your holdout directory: {}'.format(holdout_dir))
        return

    print('The length of tf_records is {}.'.format(len(tf_records)))
    first_tf_record = os.path.basename(tf_records[0])
    last_tf_record = os.path.basename(tf_records[-1])
    with utils.logged_timer('Validating from {} to {}'.format(
            first_tf_record, last_tf_record)):
        dualnet.validate(estimator_model_dir, tf_records, params)
Exemplo n.º 24
0
def gather(selfplay_dir, training_chunk_dir, params):
  """Gather selfplay data into large training chunk.

  Args:
    selfplay_dir: Where to look for games. Set as 'base_dir/data/selfplay/'.
    training_chunk_dir: where to put collected games. Set as
      'base_dir/data/training_chunks/'.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
  # Check the selfplay data from the most recent 50 models.
  _ensure_dir_exists(training_chunk_dir)
  sorted_model_dirs = sorted(tf.gfile.ListDirectory(selfplay_dir))
  models = [model_dir.strip('/')
            for model_dir in sorted_model_dirs[-params.gather_generation:]]

  with utils.logged_timer('Finding existing tfrecords...'):
    model_gamedata = {
        model: tf.gfile.Glob(
            os.path.join(selfplay_dir, model, '*'+_TF_RECORD_SUFFIX))
        for model in models
    }
  print('Found {} models'.format(len(models)))
  for model_name, record_files in sorted(model_gamedata.items()):
    print('    {}: {} files'.format(model_name, len(record_files)))

  meta_file = os.path.join(training_chunk_dir, 'meta.txt')
  try:
    with tf.gfile.GFile(meta_file, 'r') as f:
      already_processed = set(f.read().split())
  except tf.errors.NotFoundError:
    already_processed = set()

  num_already_processed = len(already_processed)

  for model_name, record_files in sorted(model_gamedata.items()):
    if set(record_files) <= already_processed:
      continue
    print('Gathering files from {}:'.format(model_name))
    tf_examples = preprocessing.shuffle_tf_examples(
        params.shuffle_buffer_size, params.examples_per_chunk, record_files)
    # tqdm to make the loops show a smart progress meter
    for i, example_batch in enumerate(tf_examples):
      output_record = os.path.join(
          training_chunk_dir,
          ('{}-{}'+_TF_RECORD_SUFFIX).format(model_name, str(i)))
      preprocessing.write_tf_examples(
          output_record, example_batch, serialize=False)
    already_processed.update(record_files)

  print('Processed {} new files'.format(
      len(already_processed) - num_already_processed))
  with tf.gfile.GFile(meta_file, 'w') as f:
    f.write('\n'.join(sorted(already_processed)))
Exemplo n.º 25
0
def validate(trained_models_dir, holdout_dir, estimator_model_dir, params):
  """Validate the latest model on the holdout dataset.

  Args:
    trained_models_dir: Directories where the completed generations/models are.
    holdout_dir: Directories where holdout data are.
    estimator_model_dir: tf.estimator model directory.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
  model_num, _ = utils.get_latest_model(trained_models_dir)

  # Get the holdout game data
  nums_names = utils.get_models(trained_models_dir)

  # Model N was trained on games up through model N-1, so the validation set
  # should only be for models through N-1 as well, thus the (model_num) term.
  models = [num_name for num_name in nums_names if num_name[0] < model_num]

  # pair is a tuple of (model_num, model_name), like (13, 000013-modelname)
  holdout_dirs = [os.path.join(holdout_dir, pair[1])
                  for pair in models[-params.holdout_generation:]]
  tf_records = []
  with utils.logged_timer('Building lists of holdout files'):
    for record_dir in holdout_dirs:
      if os.path.exists(record_dir):  # make sure holdout dir exists
        tf_records.extend(
            tf.gfile.Glob(os.path.join(record_dir, '*'+_TF_RECORD_SUFFIX)))

  if not tf_records:
    print('No holdout dataset for validation! '
          'Please check your holdout directory: {}'.format(holdout_dir))
    return

  print('The length of tf_records is {}.'.format(len(tf_records)))
  first_tf_record = os.path.basename(tf_records[0])
  last_tf_record = os.path.basename(tf_records[-1])
  with utils.logged_timer('Validating from {} to {}'.format(
      first_tf_record, last_tf_record)):
    dualnet.validate(estimator_model_dir, tf_records, params)
Exemplo n.º 26
0
def main(argv):
    """Validate a model's performance on a set of holdout data."""
    _, *validation_paths = argv
    if FLAGS.expand_validation_dirs:
        tf_records = []
        with utils.logged_timer("Building lists of holdout files"):
            for record_dir in validation_paths:
                tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))
    else:
        tf_records = validation_paths

    if not tf_records:
        raise RuntimeError("Did not find any holdout files for validating!")
    validate(*tf_records)
Exemplo n.º 27
0
def selfplay(load_file: "The path to the network model files",
             output_dir: "Where to write the games" = "data/selfplay",
             holdout_dir: "Where to write the games" = "data/holdout",
             output_sgf: "Where to write the sgfs" = "sgf/",
             verbose: '>=2 will print debug info, >=3 will print boards' = 1,
             holdout_pct: 'how many games to hold out for validation' = 0.05):
    clean_sgf = os.path.join(output_sgf, 'clean')
    full_sgf = os.path.join(output_sgf, 'full')
    utils.ensure_dir_exists(clean_sgf)
    utils.ensure_dir_exists(full_sgf)
    utils.ensure_dir_exists(output_dir)
    utils.ensure_dir_exists(holdout_dir)

    with utils.logged_timer("Loading weights from %s ... " % load_file):
        network = dual_net.DualNetwork(load_file)

    with utils.logged_timer("Playing game"):
        player = selfplay_mcts.play(network, verbose)

    output_name = '{}-{}'.format(int(time.time()), socket.gethostname())
    game_data = player.extract_data()
    with gfile.GFile(os.path.join(clean_sgf, '{}.sgf'.format(output_name)),
                     'w') as f:
        f.write(player.to_sgf(use_comments=False))
    with gfile.GFile(os.path.join(full_sgf, '{}.sgf'.format(output_name)),
                     'w') as f:
        f.write(player.to_sgf())

    tf_examples = preprocessing.make_dataset_from_selfplay(game_data)

    # Hold out 5% of games for evaluation.
    if random.random() < holdout_pct:
        fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name))
    else:
        fname = os.path.join(output_dir, "{}.tfrecord.zz".format(output_name))

    preprocessing.write_tf_examples(fname, tf_examples)
Exemplo n.º 28
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s",
                 len(tf_records), tf_records[0], tf_records[-1])
    with utils.logged_timer("Training"):
        train(*tf_records)
    if FLAGS.export_path:
        dual_net.export_model(FLAGS.export_path)
    if FLAGS.freeze:
        if FLAGS.use_tpu:
            dual_net.freeze_graph_tpu(FLAGS.export_path)
        else:
            dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt,
                                  FLAGS.trt_max_batch_size, FLAGS.trt_precision)
Exemplo n.º 29
0
def checked_run(name, *cmd):
    # Read & expand any flagfiles specified on the commandline so we can know
    # exactly what's going on.
    expanded = flags.FlagValues().read_flags_from_files(cmd)
    logging.info('Running %s:\n  %s', name, '  '.join(expanded))

    with utils.logged_timer('%s finished' % name.capitalize()):
        completed_process = subprocess.run(cmd,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.STDOUT)
        if completed_process.returncode:
            logging.error('Error running %s: %s', name,
                          completed_process.stdout.decode())
            raise RuntimeError('Non-zero return code executing %s' %
                               ' '.join(cmd))
    return completed_process
Exemplo n.º 30
0
def checked_run(name, *cmd):
  # Log the expanded & deduped list of command line arguments, so we can know
  # exactly what's going on. Note that we don't pass the expanded list of
  # arguments to the actual subprocess because of a quirk in how unknown flags
  # are handled: unknown flags in flagfiles are silently ignored, while unknown
  # flags on the command line will cause the subprocess to abort.
  logging.info(
      'Running %s:\n  %s  %s', name, cmd[0], '  '.join(expand_flags(*cmd)))

  with utils.logged_timer('%s finished' % name.capitalize()):
    completed_process = subprocess.run(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if completed_process.returncode:
      logging.error('Error running %s: %s', name,
                    completed_process.stdout.decode())
      raise RuntimeError('Non-zero return code executing %s' % ' '.join(cmd))
  return completed_process
Exemplo n.º 31
0
async def checked_run(*cmd):
    """Run the given subprocess command in a coroutine.

  Args:
    *cmd: the command to run and its arguments.

  Returns:
    The output that the command wrote to stdout as a list of strings, one line
    per element (stderr output is piped to stdout).

  Raises:
    RuntimeError: if the command returns a non-zero result.
  """

    # Start the subprocess.
    logging.info('Running: %s', expand_cmd_str(cmd))
    with utils.logged_timer('{} finished'.format(get_cmd_name(cmd))):
        p = await asyncio.create_subprocess_exec(
            *cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.STDOUT)

        # Stream output from the process stdout.
        chunks = []
        while True:
            chunk = await p.stdout.read(16 * 1024)
            if not chunk:
                break
            chunks.append(chunk)

        # Wait for the process to finish, check it was successful & build stdout.
        await p.wait()
        stdout = b''.join(chunks).decode()[:-1]
        if p.returncode:
            raise RuntimeError('Return code {} from process: {}\n{}'.format(
                p.returncode, expand_cmd_str(cmd), stdout))

        log_path = os.path.join(FLAGS.base_dir, get_cmd_name(cmd) + '.log')
        with gfile.Open(log_path, 'a') as f:
            f.write(expand_cmd_str(cmd))
            f.write('\n')
            f.write(stdout)
            f.write('\n')

        # Split stdout into lines.
        return stdout.split('\n')
Exemplo n.º 32
0
def selfplay(selfplay_dirs, selfplay_model, params):
    """Perform selfplay with a specific model.

  Args:
    selfplay_dirs: A dict to specify the directories used in selfplay.
      selfplay_dirs = {
          'output_dir': output_dir,
          'holdout_dir': holdout_dir,
          'clean_sgf': clean_sgf,
          'full_sgf': full_sgf
      }
    selfplay_model: The actual Dualnet runner for selfplay.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
    with utils.logged_timer('Playing game'):
        player = selfplay_mcts.play(params.board_size, selfplay_model,
                                    params.selfplay_readouts,
                                    params.selfplay_resign_threshold,
                                    params.simultaneous_leaves,
                                    params.selfplay_verbose)

    output_name = '{}-{}'.format(int(time.time()), socket.gethostname())

    def _write_sgf_data(dir_sgf, use_comments):
        with tf.gfile.GFile(
                os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f:
            f.write(player.to_sgf(use_comments=use_comments))

    _write_sgf_data(selfplay_dirs['clean_sgf'], use_comments=False)
    _write_sgf_data(selfplay_dirs['full_sgf'], use_comments=True)

    game_data = player.extract_data()
    tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params)

    # Hold out 5% of games for evaluation.
    if random.random() < params.holdout_pct:
        fname = os.path.join(selfplay_dirs['holdout_dir'],
                             output_name + _TF_RECORD_SUFFIX)
    else:
        fname = os.path.join(selfplay_dirs['output_dir'],
                             output_name + _TF_RECORD_SUFFIX)

    preprocessing.write_tf_examples(fname, tf_examples)
Exemplo n.º 33
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s", len(tf_records),
                 tf_records[0], tf_records[-1])
    with utils.logged_timer("Training"):
        estimator = train(*tf_records)
    if FLAGS.export_path:
        dual_net.export_model(FLAGS.export_path)
        estimator.export_saved_model(FLAGS.export_path,
                                     serving_input_receiver_fn())
    else:
        estimator.export_saved_model('saved_model',
                                     serving_input_receiver_fn())
    if FLAGS.freeze:
        if FLAGS.use_tpu:
            dual_net.freeze_graph_tpu(FLAGS.export_path)
        else:
            dual_net.freeze_graph(FLAGS.export_path)
Exemplo n.º 34
0
def run_game(network, args, device=None, sgf_dir=None, holdout_pct=0.05):
    '''Takes a played game and record results and game data.'''
    selfplay_dir = os.path.join(args.selfplay_dir, args.model_name)
    utils.ensure_dir_exists(selfplay_dir)
    holdout_dir = os.path.join(args.holdout_dir, args.model_name)
    utils.ensure_dir_exists(holdout_dir)
    if args.sgf_dir:
        sgf_dir = os.path.join(args.sgf_dir, args.model_name)
        utils.ensure_dir_exists(sgf_dir)
    if sgf_dir is not None:
        minimal_sgf_dir = os.path.join(sgf_dir, 'clean')
        full_sgf_dir = os.path.join(sgf_dir, 'full')
        utils.ensure_dir_exists(minimal_sgf_dir)
        utils.ensure_dir_exists(full_sgf_dir)
    if selfplay_dir is not None:
        utils.ensure_dir_exists(selfplay_dir)
        utils.ensure_dir_exists(holdout_dir)

    with utils.logged_timer("Playing game"):
        player = play(network, args, device=device)

    features, pis, values = player.extract_data(return_features=True)
    features = np.array(features)
    pis = np.array(pis)
    values = np.array(values)
    assert features.shape[0] == pis.shape[0] == values.shape[0]
    output_name = '{}-{}'.format(int(time.time()), features.shape[0])
    if sgf_dir is not None:
        with open(os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)),
                  'w') as f:
            f.write(player.to_sgf(use_comments=False))
        with open(os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)),
                  'w') as f:
            f.write(player.to_sgf())

    if selfplay_dir is not None:
        # Hold out 5% of games for validation.
        if random.random() < holdout_pct:
            fname = os.path.join(holdout_dir, "{}.hdf5".format(output_name))
        else:
            fname = os.path.join(selfplay_dir, "{}.hdf5".format(output_name))

        preprocessing.save_h5_examples(fname, features, pis, values)
Exemplo n.º 35
0
def selfplay(selfplay_dirs, selfplay_model, params):
  """Perform selfplay with a specific model.

  Args:
    selfplay_dirs: A dict to specify the directories used in selfplay.
      selfplay_dirs = {
          'output_dir': output_dir,
          'holdout_dir': holdout_dir,
          'clean_sgf': clean_sgf,
          'full_sgf': full_sgf
      }
    selfplay_model: The actual Dualnet runner for selfplay.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
  with utils.logged_timer('Playing game'):
    player = selfplay_mcts.play(
        params.board_size, selfplay_model, params.selfplay_readouts,
        params.selfplay_resign_threshold, params.simultaneous_leaves,
        params.selfplay_verbose)

  output_name = '{}-{}'.format(int(time.time()), socket.gethostname())

  def _write_sgf_data(dir_sgf, use_comments):
    with tf.gfile.GFile(
        os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f:
      f.write(player.to_sgf(use_comments=use_comments))

  _write_sgf_data(selfplay_dirs['clean_sgf'], use_comments=False)
  _write_sgf_data(selfplay_dirs['full_sgf'], use_comments=True)

  game_data = player.extract_data()
  tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params)

  # Hold out 5% of games for evaluation.
  if random.random() < params.holdout_pct:
    fname = os.path.join(
        selfplay_dirs['holdout_dir'], output_name + _TF_RECORD_SUFFIX)
  else:
    fname = os.path.join(
        selfplay_dirs['output_dir'], output_name + _TF_RECORD_SUFFIX)

  preprocessing.write_tf_examples(fname, tf_examples)
Exemplo n.º 36
0
def _prepare_selfplay(
    model_name, trained_models_dir, selfplay_dir, holdout_dir, sgf_dir, params):
  """Set directories and load the network for selfplay.

  Args:
    model_name: The name of the model for self-play
    trained_models_dir: Directories where the completed generations/models are.
    selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'.
    holdout_dir: Where to write the holdout data. Set as
      'base_dir/data/holdout/'.
    sgf_dir: Where to write the sgf (Smart Game Format) files. Set as
      'base_dir/sgf/'.
    params: A MiniGoParams instance of hyperparameters for the model.

  Returns:
    The directories and network model for selfplay.
  """
  # Set paths for the model with 'model_name'
  model_path = os.path.join(trained_models_dir, model_name)
  output_dir = os.path.join(selfplay_dir, model_name)
  holdout_dir = os.path.join(holdout_dir, model_name)
  # clean_sgf is to write sgf file without comments.
  # full_sgf is to write sgf file with comments.
  clean_sgf = os.path.join(sgf_dir, model_name, 'clean')
  full_sgf = os.path.join(sgf_dir, model_name, 'full')

  _ensure_dir_exists(output_dir)
  _ensure_dir_exists(holdout_dir)
  _ensure_dir_exists(clean_sgf)
  _ensure_dir_exists(full_sgf)
  selfplay_dirs = {
      'output_dir': output_dir,
      'holdout_dir': holdout_dir,
      'clean_sgf': clean_sgf,
      'full_sgf': full_sgf
  }
  # cache the network model for self-play
  with utils.logged_timer('Loading weights from {} ... '.format(model_path)):
    network = dualnet.DualNetRunner(model_path, params)
  return selfplay_dirs, network
Exemplo n.º 37
0
def main(_):
  """Run the reinforcement learning loop."""
  tf.logging.set_verbosity(tf.logging.INFO)

  params = _set_params(FLAGS)

  # A dummy model for debug/testing purpose with fewer games and iterations
  if FLAGS.test:
    params = model_params.DummyMiniGoParams()
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size_dummy/'
  else:
    # Set directories for models and datasets
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size/'

  dirs = utils.MiniGoDirectory(base_dir)

  # Run selfplay only if user specifies the argument.
  if FLAGS.selfplay:
    selfplay_model_name = FLAGS.selfplay_model_name or utils.get_latest_model(
        dirs.trained_models_dir)[1]
    max_games = FLAGS.selfplay_max_games or params.max_games_per_generation
    run_selfplay(selfplay_model_name, max_games, dirs, params)
    return

  # Run the RL pipeline
  # if no models have been trained, start from bootstrap model

  if not os.path.isdir(dirs.trained_models_dir):
    print('No trained model exists! Starting from Bootstrap...')
    print('Creating random initial weights...')
    bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params)
  else:
    print('A MiniGo base directory has been found! ')
    print('Start from the last checkpoint...')

  _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir)
  for rl_iter in range(params.max_iters_per_pipeline):
    print('RL_iteration: {}'.format(rl_iter))
    # Self-play with the best model to generate training data
    run_selfplay(
        best_model_so_far, params.max_games_per_generation, dirs, params)

    # gather selfplay data for training
    print('Gathering game output...')
    gather(dirs.selfplay_dir, dirs.training_chunk_dir, params)

    # train the next generation model
    model_num, _ = utils.get_latest_model(dirs.trained_models_dir)
    print('Training on gathered game data...')
    train(dirs.trained_models_dir, dirs.estimator_model_dir,
          dirs.training_chunk_dir, model_num + 1, params)

    # validate the latest model if needed
    if FLAGS.validation:
      print('Validating on the holdout game data...')
      validate(dirs.trained_models_dir, dirs.holdout_dir,
               dirs.estimator_model_dir, params)

    _, current_model = utils.get_latest_model(dirs.trained_models_dir)

    if FLAGS.evaluation:  # Perform evaluation if needed
      print('Evaluate models between {} and {}'.format(
          best_model_so_far, current_model))
      black_model = os.path.join(dirs.trained_models_dir, best_model_so_far)
      white_model = os.path.join(dirs.trained_models_dir, current_model)
      _ensure_dir_exists(dirs.evaluate_dir)
      with utils.logged_timer('Loading weights'):
        black_net = dualnet.DualNetRunner(black_model, params)
        white_net = dualnet.DualNetRunner(white_model, params)

      best_model_so_far = evaluate(
          best_model_so_far, black_net, current_model, white_net,
          dirs.evaluate_dir, params)
      print('Winner of evaluation: {}!'.format(best_model_so_far))
    else:
      best_model_so_far = current_model