Exemplo n.º 1
0
async def sample_training_examples(state):
    """Sample training examples from recent selfplay games.

    Args:
        state: the RL loop State instance.

    Returns:
        A list of golden chunks up to num_records in length, sorted by path.
    """

    dirs = [x.path for x in os.scandir(fsdb.selfplay_dir()) if x.is_dir()]
    src_patterns = []
    for d in sorted(dirs, reverse=True)[:FLAGS.window_size]:
        src_patterns.append(os.path.join(d, '*', '*', '*.tfrecord.zz'))

    dst_path = os.path.join(fsdb.golden_chunk_dir(),
                            '{}.tfrecord.zz'.format(state.train_model_name))

    logging.info('Writing training chunks to %s', dst_path)
    lines = await sample_records(src_patterns, dst_path,
                                 num_read_threads=8,
                                 num_write_threads=8,
                                 sample_frac=FLAGS.train_filter)
    logging.info('\n'.join(lines))

    chunk_pattern = os.path.join(
        fsdb.golden_chunk_dir(),
        '{}-*-of-*.tfrecord.zz'.format(state.train_model_name))
    chunk_paths = sorted(tf.gfile.Glob(chunk_pattern))
    assert len(chunk_paths) == 8

    return chunk_paths
Exemplo n.º 2
0
def initialize_from_checkpoint(state):
  """Initialize the reinforcement learning loop from a checkpoint."""
  # The checkpoint's work_dir should contain the most recently trained model.
  model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir,
                                       'work_dir/model.ckpt-*.pb'))
  if len(model_paths) != 1:
    raise RuntimeError('Expected exactly one model in the checkpoint work_dir, '
                       'got [{}]'.format(', '.join(model_paths)))
  start_model_path = model_paths[0]

  # Copy the training chunks.
  golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
  for basename in os.listdir(golden_chunks_dir):
    path = os.path.join(golden_chunks_dir, basename)
    shutil.copy(path, fsdb.golden_chunk_dir())

  # Copy the latest trained model into the models directory and use it on the
  # first round of selfplay.
  state.best_model_name = 'checkpoint'
  best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

  dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch)

  # Copy the training files.
  work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
  for basename in os.listdir(work_dir):
    path = os.path.join(work_dir, basename)
    shutil.copy(path, fsdb.working_dir())
def fill_and_wait_models(bufsize=EXAMPLES_PER_GENERATION,
                         write_dir=None,
                         threads=8,
                         model_window=100,
                         skip_first_rsync=False):
    """ Fills a ringbuffer with positions from the most recent games, then
    continually rsync's and updates the buffer until a new model is promoted.
    Once it detects a new model, iit then dumps its contents for training to
    immediately begin on the next model.
    """
    write_dir = write_dir or fsdb.golden_chunk_dir()
    buf = ExampleBuffer(bufsize)
    models = fsdb.get_models()[-model_window:]
    if not skip_first_rsync:
        with timer("Rsync"):
            smart_rsync(models[-1][0] - 6)
    files = tqdm(map(files_for_model, models), total=len(models))
    buf.parallel_fill(list(itertools.chain(*files)), threads=threads)

    print("Filled buffer, watching for new games")
    while fsdb.get_latest_model()[0] == models[-1][0]:
        with timer("Rsync"):
            smart_rsync(models[-1][0] - 2)
        new_files = tqdm(map(files_for_model, models[-2:]), total=len(models))
        buf.update(list(itertools.chain(*new_files)))
        time.sleep(60)
    latest = fsdb.get_latest_model()

    print("New model!", latest[1], "!=", models[-1][1])
    print(buf)
    buf.flush(os.path.join(write_dir, str(latest[0] + 1) + '.tfrecord.zz'))
Exemplo n.º 4
0
def get_files_exchange(state, mpi_rank):
    ##-->Train gets selfplay
    ##-->Self-play gets eval-model
    if mpi_rank == FLAGS.train_rank:
        selfplay_files = glob.glob(
            os.path.join(FLAGS.shared_dir_exchange,
                         state.output_model_name + '-mpirank-*.zz*'))
        for filename in selfplay_files:
            print('Rank = {}, Getting file={} iter={} from SharedFS'.format(
                mpi_rank, filename, state.iter_num))
            shutil.copy(filename, fsdb.golden_chunk_dir())
    else:
        ##self-play needs to get training eval model
        dst_dir = os.path.join(fsdb.models_dir())

        src_file = os.path.join(FLAGS.shared_dir_exchange,
                                state.train_model_name + '.pb')
        print('Rank = {}, Getting file={} iter={} from SharedFS'.format(
            mpi_rank, src_file, state.iter_num))
        shutil.copy(src_file, dst_dir)

        src_file = os.path.join(FLAGS.shared_dir_exchange,
                                state.train_model_name + '.pb' + '.og')
        print('Rank = {}, Getting file={} iter={} from SharedFS'.format(
            mpi_rank, src_file, state.iter_num))
        shutil.copy(src_file, dst_dir)
Exemplo n.º 5
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  for file_name in [
        "target.pb", "target_raw.ckpt.data-00000-of-00001",
        "target_raw.ckpt.index", "target_raw.ckpt.meta"]:
    shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name,
                os.path.join(fsdb.models_dir(), file_name))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  with logged_timer('Total time'):
    try:
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
Exemplo n.º 6
0
def rl_loop():
  state = State()
  bootstrap(state)
  selfplay(state)

  while state.iter_num < 100:
    holdout_dir = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num)
    tf_records = os.path.join(fsdb.golden_chunk_dir(), '*.zz')
    tf_records = sorted(tensorflow.gfile.Glob(tf_records), reverse=True)[:5]

    state.iter_num += 1

    # Train on shuffled game data of the last 5 selfplay rounds.
    train(state, tf_records)

    # These could run in parallel.
    validate(state, holdout_dir)
    model_win_rate = evaluate_model(state)
    target_win_rate = evaluate_target(state)

    # This could run in parallel to the rest.
    selfplay(state)

    if model_win_rate >= 0.55:
      # Promote the trained model to the play model.
      state.play_model_num = state.train_model_num
      state.play_model_name = state.train_model_name
      state.train_model_num += 1
    elif model_win_rate < 0.4:
      # Bury the selfplay games which produced a significantly worse model.
      logging.info('Burying %s.', tf_records[0])
      shutil.move(tf_records[0], tf_records[0] + '.bury')

    yield target_win_rate
Exemplo n.º 7
0
def initialize_from_checkpoint(state):
    """Initialize the reinforcement learning loop from a checkpoint."""

    # The checkpoint's work_dir should contain the most recently trained model.
    model_paths = glob.glob(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
    if len(model_paths) != 1:
        raise RuntimeError(
            'Expected exactly one model in the checkpoint work_dir, '
            'got [{}]'.format(', '.join(model_paths)))
    start_model_path = model_paths[0]

    # Copy the latest trained model into the models directory and use it on the
    # first round of selfplay.
    state.best_model_name = 'checkpoint'
    shutil.copy(start_model_path,
                os.path.join(fsdb.models_dir(), state.best_model_name + '.pb'))
    shutil.copy(
        start_model_path + '.og',
        os.path.join(fsdb.models_dir(), state.best_model_name + '.pb.og'))

    # Copy the training chunks.
    golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
    for basename in os.listdir(golden_chunks_dir):
        path = os.path.join(golden_chunks_dir, basename)
        out_path = os.path.join(fsdb.golden_chunk_dir(), basename)
        buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
        buffer.parallel_fill(tf.gfile.Glob(path))
        buffer.flush(out_path, FLAGS.num_gpus_train)

    # Copy the training files.
    work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
    for basename in os.listdir(work_dir):
        path = os.path.join(work_dir, basename)
        shutil.copy(path, fsdb.working_dir())
Exemplo n.º 8
0
def selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    result = checked_run([
        'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100',
        '--model={}.pb'.format(model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ] + cc_flags(state), 'selfplay')
    logging.info(get_lines(result, make_slice[-2:]))

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(
        os.path.join(fsdb.golden_chunk_dir(),
                     state.output_model_name + '.tfrecord.zz'))
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        try:
            rl_loop()
        finally:
            asyncio.get_event_loop().close()
Exemplo n.º 10
0
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        for target_win_rate in rl_loop():
            if target_win_rate > 0.5:
                return logging.info('Passed exit criteria.')
        logging.info('Failed to converge.')
Exemplo n.º 11
0
def gen_golden_chunk(files, state):
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
    buffer.parallel_fill(files[1], threads=1)
    buffer.flush(
        os.path.join(
            fsdb.golden_chunk_dir(),
            state.output_model_name + '-{}.tfrecord.zz'.format(files[0])))
Exemplo n.º 12
0
def get_golden_chunk_records():
  """Return up to num_records of golden chunks to train on.

  Returns:
    A list of golden chunks up to num_records in length, sorted by path.
  """

  pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz')
  return sorted(tf.gfile.Glob(pattern), reverse=True)[:FLAGS.window_size]
Exemplo n.º 13
0
async def sample_training_examples(state):
    """Sample training examples from recent selfplay games.

    Args:
        state: the RL loop State instance.

    Returns:
        A list of golden chunks up to num_records in length, sorted by path.
    """

    # Training examples are written out to the following directory hierarchy:
    #   selfplay_dir/device_id/model_name/timestamp/
    # Read examples from the most recent `window_size` models.
    device_dirs = [
        x.path for x in os.scandir(fsdb.selfplay_dir()) if x.is_dir()
    ]
    models = set()
    for d in device_dirs:
        models.update([x.name for x in os.scandir(d) if x.is_dir()])
    models = sorted(models, reverse=True)[:FLAGS.window_size]

    src_patterns = []
    for d in device_dirs:
        for model in models:
            src_patterns.append(os.path.join(d, model, '*', '*.tfrecord.zz'))

    dst_path = os.path.join(fsdb.golden_chunk_dir(),
                            '{}.tfrecord.zz'.format(state.train_model_name))

    logging.info('Writing training chunks to %s', dst_path)
    lines = await sample_records(src_patterns,
                                 dst_path,
                                 num_read_threads=8,
                                 num_write_threads=8,
                                 sample_frac=FLAGS.train_filter)
    logging.info('\n'.join(lines))

    chunk_pattern = os.path.join(
        fsdb.golden_chunk_dir(),
        '{}-*-of-*.tfrecord.zz'.format(state.train_model_name))
    chunk_paths = sorted(tf.gfile.Glob(chunk_pattern))
    assert len(chunk_paths) == 8

    return chunk_paths
Exemplo n.º 14
0
def post_train(state):
  model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
  dual_net.optimize_graph(model_path + '.pb', model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch)
  mll.save_model(state.iter_num-1)

  # Append the time elapsed from when the RL was started to when this model
  # was trained.
  elapsed = time.time() - state.start_time
  timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
  with gfile.Open(timestamps_path, 'a') as f:
    print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
Exemplo n.º 15
0
def get_golden_chunk_records(num_records):
    """Return up to num_records of golden chunks to train on.

  Args:
    num_records: maximum number of records to return.

  Returns:
    A list of golden chunks up to num_records in length, sorted by path.
  """

    pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz')
    return sorted(tf.gfile.Glob(pattern), reverse=True)[:num_records]
Exemplo n.º 16
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    logging.getLogger('mlperf_compliance').propagate = False

    ##-->multi-node setup
    if FLAGS.use_multinode:
        mpi_comm = MPI.COMM_WORLD
        mpi_rank = mpi_comm.Get_rank()
        mpi_size = mpi_comm.Get_size()
        print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format(
            mpi_rank, mpi_size, socket.gethostname()))
    else:
        mpi_comm = None
        mpi_rank = 0
        mpi_size = 1

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
    dirs = [
        fsdb.models_dir(),
        fsdb.selfplay_dir(),
        fsdb.holdout_dir(),
        fsdb.eval_dir(),
        fsdb.golden_chunk_dir(),
        fsdb.working_dir()
    ]

    ##-->sharedFS for dataExchange. tmp solution 5/6/2019
    if FLAGS.use_multinode:
        ensure_dir_exists(FLAGS.shared_dir_exchange)
    for d in dirs:
        ensure_dir_exists(d)

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(),
                                                'target.pb'))
    shutil.copy(FLAGS.target_path + '.og',
                os.path.join(fsdb.models_dir(), 'target.pb.og'))

    with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)):
        try:
            rl_loop(mpi_comm, mpi_rank, mpi_size)
        finally:
            asyncio.get_event_loop().close()
Exemplo n.º 17
0
async def selfplay(state, flagfile='selfplay', seed_factor=0):
  """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
    seed_factor: Factor to increase seed.
  """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

  lines = await run(
      'bazel-bin/cc/selfplay',
      '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
      '--model={}'.format(get_ckpt_path(state.best_model_path)),
      '--output_dir={}'.format(output_dir),
      '--holdout_dir={}'.format(holdout_dir),
      '--seed={}'.format(state.seed+100*seed_factor))
  result = '\n'.join(lines[-6:])
  logging.info(result)
  result = '\n'.join(lines[-50:])
  try:
      stats = parse_win_stats_table(result, 1)[0]
      num_games = stats.total_wins
      logging.info('Black won %0.3f, white won %0.3f',
                   stats.black_wins.total / num_games,
                   stats.white_wins.total / num_games)
  except AssertionError:
    # Poplar logging might screw up lines extraction approach.
    logging.error("No results to parse: \n %s" % lines[-50:])

  if not MULTI_SP:
    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                              state.output_model_name + '.tfrecord.zz'))
Exemplo n.º 18
0
def divide_record(state, pattern, num_out, rank):
    if rank < 0:
        rank_str = ''
    else:
        rank_str = '-mpirank-' + str(rank)
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    output = os.path.join(fsdb.golden_chunk_dir(),
                          state.output_model_name + rank_str + '.tfrecord.zz')
    buffer.flush(output, num_out)

    if rank >= 0:
        ##put files to exchange
        output = output + '*'
        put_files_exchange(state, rank, fileout=output)
    return
Exemplo n.º 19
0
async def selfplay_multi(state, num_ipus):
  """ Start *num_ipu* selfplay processes """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
  flagfile = 'selfplay'

  all_tasks = []
  loop = asyncio.get_event_loop()
  for i in range(num_ipus):
    all_tasks.append(loop.create_task(selfplay_sub(state, output_dir, holdout_dir, flagfile, i)))
  all_lines = await asyncio.gather(*all_tasks, return_exceptions=True)

  black_wins_total = white_wins_total = num_games = 0
  for lines in all_lines:
    if type(lines) == RuntimeError or type(lines) == OSError:
      raise lines
    result = '\n'.join(lines[-6:])
    logging.info(result)
    stats = parse_win_stats_table(result, 1)[0]
    num_games += stats.total_wins
    black_wins_total += stats.black_wins.total
    white_wins_total += stats.white_wins.total

  logging.info('Black won %0.3f, white won %0.3f',
               black_wins_total / num_games,
               white_wins_total / num_games)

  # copy paste from selfplay to aggregate results
  # potentially should be parallized to training?

  # Write examples to a single record.
  pattern = os.path.join(output_dir, '*', '*.zz')
  random.seed(state.seed)
  tf.set_random_seed(state.seed)
  np.random.seed(state.seed)
  # TODO(tommadams): This method of generating one golden chunk per generation
  # is sub-optimal because each chunk gets reused multiple times for training,
  # introducing bias. Instead, a fresh dataset should be uniformly sampled out
  # of *all* games in the training window before the start of each training run.
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

  # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
  # so.
  logging.info('Writing golden chunk from "{}"'.format(pattern))
  buffer.parallel_fill(tf.gfile.Glob(pattern))
  buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                            state.output_model_name + '.tfrecord.zz'))
def fill_and_wait_time(bufsize=EXAMPLES_PER_GENERATION,
                       write_dir=None,
                       threads=32,
                       start_from=None):
    start_from = start_from or dt.datetime.utcnow()
    write_dir = write_dir or fsdb.golden_chunk_dir()
    buf = ExampleBuffer(bufsize)
    chunk_to_make, fast_write = _determine_chunk_to_make(write_dir)

    hours = fsdb.get_hour_dirs()
    with timer("Rsync"):
        time_rsync(
            min(dt.datetime.strptime(hours[-1], "%Y-%m-%d-%H/"), start_from))
        start_from = dt.datetime.utcnow()

    hours = fsdb.get_hour_dirs()
    files = (tf.gfile.Glob(os.path.join(LOCAL_DIR, d, "*.zz"))
             for d in reversed(hours)
             if tf.gfile.Exists(os.path.join(LOCAL_DIR, d)))
    files = itertools.islice(files, get_window_size(chunk_to_make))

    models = fsdb.get_models()
    buf.parallel_fill(list(itertools.chain.from_iterable(files)),
                      threads=threads)
    print("Filled buffer, watching for new games")

    while (fsdb.get_latest_model() == models[-1]
           or buf.total_updates < MINIMUM_NEW_GAMES):
        with timer("Rsync"):
            time_rsync(start_from - dt.timedelta(minutes=60))
        start_from = dt.datetime.utcnow()
        hours = sorted(fsdb.get_hour_dirs(LOCAL_DIR))
        new_files = list(
            map(lambda d: tf.gfile.Glob(os.path.join(LOCAL_DIR, d, '*.zz')),
                hours[-2:]))
        buf.update(list(itertools.chain.from_iterable(new_files)))
        if fast_write:
            break
        time.sleep(30)
        if fsdb.get_latest_model() != models[-1]:
            print("New model!  Waiting for games. Got", buf.total_updates,
                  "new games so far")

    latest = fsdb.get_latest_model()
    print("New model!", latest[1], "!=", models[-1][1])
    print(buf)
    buf.flush(chunk_to_make)
Exemplo n.º 21
0
def get_golden_chunk_records(state, mpi_size=1):
    """Return up to num_records of golden chunks to train on.

  Returns:
    A list of golden chunks up to num_records in length, sorted by path.
  """

    ##how many selfplay nodes, do we fetch data from?
    num_selfplays = 1 if mpi_size == 1 else (mpi_size - 1)
    if state.iter_num <= FLAGS.window_size:
        win_size = (state.iter_num) * num_selfplays + (FLAGS.window_size -
                                                       state.iter_num)
    else:
        win_size = (FLAGS.window_size) * num_selfplays
    print('Train get_golden_chunks at iter = {} has win_size = {}'.format(
        state.iter_num, win_size))

    pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz*')
    return sorted(tf.gfile.Glob(pattern),
                  reverse=True)[:win_size * FLAGS.num_gpus_train]
Exemplo n.º 22
0
def initialize_from_checkpoint(state):
  """Initialize the reinforcement learning loop from a checkpoint."""
  # The checkpoint's work_dir should contain the most recently trained model.
  model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir,
                                       'work_dir/model.ckpt-*.pb'))
  print(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
  print(os.getcwd())
  if len(model_paths) != 1:
    raise RuntimeError(
      'Expected exactly one model in the checkpoint work_dir'
      '({}), got [{}]'.format(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir'), ', '.join(model_paths)))
  start_model_path = model_paths[0]

  # Copy the latest trained model into the models directory and use it on the
  # first round of selfplay.
  state.best_model_name = 'checkpoint'

  shutil.copy(start_model_path,
              os.path.join(fsdb.models_dir(), state.best_model_name + '.pb'))

  start_model_files = glob.glob(os.path.join(
    FLAGS.checkpoint_dir, 'work_dir/model.ckpt-9383_raw.ckpt*'))

  for file_name in start_model_files:
    shutil.copy(file_name,
        os.path.join(fsdb.models_dir(),
                     state.best_model_name +
                     os.path.basename(file_name)[len("model.ckpt-9383"):]))

  # Copy the training chunks.
  golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, "..", 'golden_chunks')
  for basename in os.listdir(golden_chunks_dir):
    path = os.path.join(golden_chunks_dir, basename)
    shutil.copy(path, fsdb.golden_chunk_dir())

  # Copy the training files.
  work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
  for basename in os.listdir(work_dir):
    path = os.path.join(work_dir, basename)
    shutil.copy(path, fsdb.working_dir())
Exemplo n.º 23
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  mll.init_start()
  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(),
          fsdb.mpi_log_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb'))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node))
  logging.info('Train nodes = {}'.format(FLAGS.train_node))
  logging.info('Eval nodes = {}'.format(FLAGS.eval_node))

  with logged_timer('Total time'):
    try:
      mll.init_stop()
      mll.run_start()
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
Exemplo n.º 24
0
def selfplay(state):
  play_output_name = state.play_output_name
  play_output_dir = os.path.join(fsdb.selfplay_dir(), play_output_name)
  play_holdout_dir = os.path.join(fsdb.holdout_dir(), play_output_name)

  result = checked_run([
      'external/minigo/cc/main', '--mode=selfplay', '--parallel_games=2048',
      '--num_readouts=100', '--model={}'.format(
          state.play_model_path), '--output_dir={}'.format(play_output_dir),
      '--holdout_dir={}'.format(play_holdout_dir)
  ] + cc_flags(state), 'selfplay')
  logging.info(get_lines(result, make_slice[-2:]))

  # Write examples to a single record.
  logging.info('Extracting examples')
  random.seed(state.seed)
  tensorflow.set_random_seed(state.seed)
  numpy.random.seed(state.seed)
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
  buffer.parallel_fill(
      tensorflow.gfile.Glob(os.path.join(play_output_dir, '*.zz')))
  buffer.flush(
      os.path.join(fsdb.golden_chunk_dir(), play_output_name + '.tfrecord.zz'))
Exemplo n.º 25
0
def spawn_train_workers(state):
    # need to be removed
    tf_records = get_golden_chunk_records(state)
    comm_world = MPI.COMM_WORLD

    # spawn one worker process
    print("Spawning worker processes on {}".format(socket.gethostname()))
    mpi_info = MPI.Info.Create()
    num_workers = FLAGS.num_gpus_train
    # subtract 1 core from this value, oversubscription might not work
    cores_per_worker = (FLAGS.cores_per_socket *
                        FLAGS.num_socket) // num_workers - 1

    mpi_info.Set("host", socket.gethostname())
    mpi_info.Set(
        "map_by", "ppr:{}:socket,PE={}".format(num_workers // FLAGS.num_socket,
                                               cores_per_worker))
    icomm = MPI.COMM_SELF.Spawn(
        "python3",
        maxprocs=num_workers,
        args=[
            'train.py', *tf_records,
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'train.flags')),
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(
                os.path.join(fsdb.models_dir(),
                             'new_model')), '--training_seed=13337',
            '--num_selfplays={}'.format(comm_world.size - 1),
            '--window_iters={}'.format(FLAGS.window_size),
            '--total_iters={}'.format(FLAGS.iterations),
            '--golden_chunk_pattern={}'.format(
                os.path.join(fsdb.golden_chunk_dir(), '*.zz*')),
            '--freeze=true', '--use_multinode=true', '--use_mgpu_horovod=true'
        ],
        info=mpi_info)
    return icomm
Exemplo n.º 26
0
def main(unused_argv):

    for i in range(0, NUM_LOOP):
        if i == 0:
            src_model_name = shipname.generate(0)
            fsdb.switch_base(os.path.join(base_dir, src_model_name))
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            bootstrap_model_path = os.path.join(fsdb.models_dir(),
                                                src_model_name)
            mask_flags.checked_run([
                'python3', 'bootstrap.py',
                '--export_path={}'.format(bootstrap_model_path),
                '--work_dir={}'.format(fsdb.working_dir()),
                '--flagfile=rl_loop/local_flags'
            ])
            dst_model_name = shipname.generate(1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))
        else:
            src_model_name = dst_model_name
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            dst_model_name = shipname.generate(i + 1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))

        utils.ensure_dir_exists(fsdb.models_dir())
        utils.ensure_dir_exists(fsdb.selfplay_dir())
        utils.ensure_dir_exists(fsdb.holdout_dir())
        utils.ensure_dir_exists(fsdb.sgf_dir())
        utils.ensure_dir_exists(fsdb.eval_dir())
        utils.ensure_dir_exists(fsdb.golden_chunk_dir())
        utils.ensure_dir_exists(fsdb.working_dir())

        #bootstrap_name = shipname.generate(0)
        #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)

        print(src_model_name)
        print(src_model_path)
        selfplay_cmd = [
            'python3', 'selfplay.py', '--load_file={}'.format(src_model_path),
            '--selfplay_dir={}'.format(
                os.path.join(fsdb.selfplay_dir(),
                             dst_model_name)), '--holdout_dir={}'.format(
                                 os.path.join(fsdb.holdout_dir(),
                                              dst_model_name)),
            '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
            '--flagfile=rl_loop/local_flags'
        ]

        # Selfplay twice
        mask_flags.checked_run(selfplay_cmd)
        mask_flags.checked_run(selfplay_cmd)

        # and once more to generate a held out game for validation
        # exploits flags behavior where if you pass flag twice, second one wins.
        mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

        # Double check that at least one sgf has been generated.
        assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

        print("Making shuffled golden chunk from selfplay data...")
        # TODO(amj): refactor example_buffer so it can be called the same way
        # as everything else.
        eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                          local_dir=fsdb.working_dir(),
                          game_dir=fsdb.selfplay_dir(),
                          model_num=1,
                          positions=64,
                          threads=8,
                          sampling_frac=1)

        tf_records = sorted(
            gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

        #trained_model_name = shipname.generate(1)
        trained_model_name = dst_model_name
        trained_model_path = os.path.join(fsdb.models_dir(),
                                          trained_model_name)

        # Train on shuffled game data
        mask_flags.checked_run([
            'python3', 'train.py', *tf_records,
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(trained_model_path),
            '--flagfile=rl_loop/local_flags'
        ])

    print("Finished!")
Exemplo n.º 27
0
def get_golden_chunk_records(num_records):
    # Sort the list of chunks so that the most recent ones are first and return
    # the requested prefix.
    pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz')
    return sorted(tf.gfile.Glob(pattern), reverse=True)[:num_records]
Exemplo n.º 28
0
async def selfplay(state, flagfile='selfplay'):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    output_dir = '/tmp/minigo' + output_dir

    multi_instance, num_instance, flag_list = extract_multi_instance([
        '--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir,
                                                     flagfile))
    ])
    sp_cmd = [
        'bazel-bin/cc/selfplay',
        '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
        '--model={}'.format(state.best_model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ]
    if not multi_instance:
        lines = await run(*sp_cmd, '--seed={}'.format(state.seed))
    else:
        if FLAGS.selfplay_node == []:
            # run selfplay locally
            lines = await run('python3', 'ml_perf/execute.py',
                              '--num_instance={}'.format(num_instance), '--',
                              *sp_cmd, '--seed={}'.format(state.seed))
        else:
            with logged_timer('selfplay mn'):
                # run one selfplay instance per host
                lines = await run_distributed(
                    ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'],
                    num_instance, FLAGS.selfplay_node, None, None, state.seed,
                    *sp_cmd)

    #result = '\n'.join(lines)
    #with logged_timer('parse win stats'):
    #  stats = parse_win_stats_table(result, 1)[0]
    #  num_games = stats.total_wins
    #  black_total = stats.black_wins.total
    #  white_total = stats.white_wins.total

    #  logging.info('Black won %0.3f, white won %0.3f',
    #               black_total / num_games,
    #               white_total / num_games)
    #  bias = abs(white_total - black_total)/num_games
    #  logging.info('Black total %d, white total %d, total games %d, bias %0.3f.',
    #               black_total, white_total, num_games, bias)

    with logged_timer('generate golden chunk'):
        # Write examples to a single record.
        hosts = FLAGS.selfplay_node
        if hosts == []:
            hosts = ['localhost']
        num_instance = len(hosts)
        numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores
        train_instance_num = FLAGS.train_instance_per_numa * len(
            FLAGS.train_node) * numa_per_node
        selfplay_node_num = len(hosts)
        selfplay_num = selfplay_node_num
        out_files_number = int(train_instance_num /
                               gcd(train_instance_num, selfplay_num))

        cmd = [
            'python3', 'ml_perf/divide_golden_chunk.py',
            '--read_path={}'.format(output_dir + "/*"),
            '--write_path={}'.format(
                os.path.join(fsdb.golden_chunk_dir(),
                             state.output_model_name + '.tfrecord.zz')),
            '--out_files_number={}'.format(out_files_number),
            '--physical_cores={}'.format(FLAGS.physical_cores),
            '--base_dir={}'.format(FLAGS.base_dir)
        ]
        lines = await run_distributed([], 1, hosts, None, None, state.seed,
                                      *cmd)
Exemplo n.º 29
0
async def train(state, window_size):
    """Run training and write a new model to the fsdb models_dir.

  Args:
    state: the RL loop State instance.
  """
    train_node = FLAGS.train_node
    num_node = len(train_node)
    if num_node == 0:
        dist_train = False
    else:
        dist_train = True

    if dist_train:
        intra_threads = FLAGS.numa_cores // FLAGS.train_instance_per_numa - 1
        numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores
        instance_per_node = numa_per_node * FLAGS.train_instance_per_numa

        mpi_async_progress = ''
        for i in range(numa_per_node):
            for j in range(FLAGS.train_instance_per_numa):
                if (not i == 0) or (not j == 0):
                    mpi_async_progress += ','
                mpi_async_progress += '{}'.format(i * FLAGS.numa_cores + j)
    else:
        intra_threads = FLAGS.physical_cores

    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
    cmd = [
        'python3', 'train.py',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--window_size={}'.format(window_size),
        '--data_path={}'.format(fsdb.golden_chunk_dir()),
        '--training_seed={}'.format(state.seed), '--freeze=True',
        '--num_inter_threads=1', '--num_intra_threads={}'.format(intra_threads)
    ]

    if (dist_train):
        genvs = [
            'HOROVOD_FUSION_THRESHOLD=134217728', 'KMP_BLOCKTIME=0',
            'KMP_HW_SUBSET=1T', 'OMP_BIND_PROC=true',
            'I_MPI_ASYNC_PROGRESS_PIN=' + mpi_async_progress,
            'OMP_NUM_THREADS={}'.format(intra_threads)
        ]
        hosts = []
        proclists = []
        numa_nodes = []
        for node in range(num_node):
            # add all instance to the list
            for numa in range(numa_per_node):
                for instance in range(FLAGS.train_instance_per_numa):
                    hosts += [train_node[node]]
                    proclist = numa * FLAGS.numa_cores + FLAGS.train_instance_per_numa + instance * intra_threads
                    proclists += ['{}'.format(proclist)]
                    numa_nodes += ['{}'.format(numa)]

        lines = await run_distributed(genvs, 1, hosts, proclists, numa_nodes,
                                      None, *cmd, '--dist_train=True')
    else:
        lines = await run(*cmd)
    print('\n'.join(lines), file=sys.stderr)
Exemplo n.º 30
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.sgf_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    bootstrap_name = shipname.generate(0)
    bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)
    mask_flags.checked_run([
        'python3', 'bootstrap.py',
        '--export_path={}'.format(bootstrap_model_path),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    selfplay_cmd = [
        'python3', 'selfplay.py',
        '--load_file={}'.format(bootstrap_model_path),
        '--selfplay_dir={}'.format(
            os.path.join(fsdb.selfplay_dir(),
                         bootstrap_name)), '--holdout_dir={}'.format(
                             os.path.join(fsdb.holdout_dir(), bootstrap_name)),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
        '--flagfile=rl_loop/local_flags'
    ]

    # Selfplay twice
    mask_flags.checked_run(selfplay_cmd)
    mask_flags.checked_run(selfplay_cmd)
    # and once more to generate a held out game for validation
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

    # Double check that at least one sgf has been generated.
    assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

    print("Making shuffled golden chunk from selfplay data...")
    # TODO(amj): refactor example_buffer so it can be called the same way
    # as everything else.
    eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                      local_dir=fsdb.working_dir(),
                      game_dir=fsdb.selfplay_dir(),
                      model_num=1,
                      positions=64,
                      threads=8,
                      sampling_frac=1)

    tf_records = sorted(
        gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

    trained_model_name = shipname.generate(1)
    trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name)

    # Train on shuffled game data
    mask_flags.checked_run([
        'python3', 'train.py', *tf_records,
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(trained_model_path),
        '--flagfile=rl_loop/local_flags'
    ])

    # Validate the trained model on held out game
    mask_flags.checked_run([
        'python3', 'validate.py',
        os.path.join(fsdb.holdout_dir(), bootstrap_name),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    # Verify that trained model works for selfplay
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd +
                           ['--load_file={}'.format(trained_model_path)])

    mask_flags.checked_run([
        'python3', 'evaluate.py', bootstrap_model_path, trained_model_path,
        '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()),
        '--flagfile=rl_loop/local_flags'
    ])
    print("Completed integration test!")