Exemplo n.º 1
0
def rl_loop():
  state = State()
  bootstrap(state)
  selfplay(state)

  while state.iter_num < 100:
    holdout_dir = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num)
    tf_records = os.path.join(fsdb.golden_chunk_dir(), '*.zz')
    tf_records = sorted(tensorflow.gfile.Glob(tf_records), reverse=True)[:5]

    state.iter_num += 1

    # Train on shuffled game data of the last 5 selfplay rounds.
    train(state, tf_records)

    # These could run in parallel.
    validate(state, holdout_dir)
    model_win_rate = evaluate_model(state)
    target_win_rate = evaluate_target(state)

    # This could run in parallel to the rest.
    selfplay(state)

    if model_win_rate >= 0.55:
      # Promote the trained model to the play model.
      state.play_model_num = state.train_model_num
      state.play_model_name = state.train_model_name
      state.train_model_num += 1
    elif model_win_rate < 0.4:
      # Bury the selfplay games which produced a significantly worse model.
      logging.info('Burying %s.', tf_records[0])
      shutil.move(tf_records[0], tf_records[0] + '.bury')

    yield target_win_rate
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        try:
            rl_loop()
        finally:
            asyncio.get_event_loop().close()
Exemplo n.º 3
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  for file_name in [
        "target.pb", "target_raw.ckpt.data-00000-of-00001",
        "target_raw.ckpt.index", "target_raw.ckpt.meta"]:
    shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name,
                os.path.join(fsdb.models_dir(), file_name))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  with logged_timer('Total time'):
    try:
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
Exemplo n.º 4
0
def selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    result = checked_run([
        'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100',
        '--model={}.pb'.format(model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ] + cc_flags(state), 'selfplay')
    logging.info(get_lines(result, make_slice[-2:]))

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(
        os.path.join(fsdb.golden_chunk_dir(),
                     state.output_model_name + '.tfrecord.zz'))
Exemplo n.º 5
0
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        for target_win_rate in rl_loop():
            if target_win_rate > 0.5:
                return logging.info('Passed exit criteria.')
        logging.info('Failed to converge.')
Exemplo n.º 6
0
async def start_selfplay():
    output_dir = os.path.join(fsdb.selfplay_dir(), "$MODEL")
    holdout_dir = os.path.join(fsdb.holdout_dir(), "$MODEL")
    model_pattern = os.path.join(fsdb.models_dir(), '%d.pb')

    logs = []
    processes = []
    loop = asyncio.get_event_loop()
    for i, device in enumerate(FLAGS.selfplay_devices):
        cmd = [
            'bazel-bin/cc/concurrent_selfplay',
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'selfplay.flags')),
            '--run_forever=1',
            '--device={}'.format(device),
            '--model={}'.format(model_pattern),
            '--output_dir={}/{}'.format(output_dir, i),
            '--holdout_dir={}/{}'.format(holdout_dir, i)]

        cmd_str = await expand_cmd_str(cmd)
        f = open(os.path.join(FLAGS.base_dir, 'selfplay_%d.log' % i), 'w')
        f.write(cmd_str + '\n\n')
        f.flush()
        logging.info('Running: %s', cmd_str)

        processes.append(await asyncio.create_subprocess_exec(
            *cmd, stdout=f, stderr=asyncio.subprocess.STDOUT))
        logs.append(f)

    return (processes, logs)
Exemplo n.º 7
0
def run_tpu(no_resign=False):
    os.environ[
        'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH'] = '/etc/ssl/certs/ca-certificates.crt'
    flagset = [
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--model={}'.format(
            os.path.join(fsdb.working_dir(), 'model.ckpt-%d.pb')),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--run_forever=true',
        '--output_bigtable={}'.format(FLAGS.output_bigtable)
    ]

    if 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ:
        flagset.append('--tpu_name={}'.format(
            os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS']))

    if no_resign:
        flagset.extend(['--flagfile=rl_loop/distributed_flags_nr'])
    else:
        flagset.extend([
            '--flags_path={}'.format(fsdb.flags_path()),
            '--flagfile=rl_loop/distributed_flags'
        ])

    mask_flags.checked_run(flagset)
Exemplo n.º 8
0
def validate_holdout_selfplay():
    """Validate on held-out selfplay data."""
    holdout_dirs = (
        os.path.join(fsdb.holdout_dir(), d)
        for d in reversed(gfile.ListDirectory(fsdb.holdout_dir()))
        if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(), d))
        for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(), d)))

    # This is a roundabout way of computing how many hourly directories we need
    # to read in order to encompass 20,000 holdout games.
    holdout_dirs = set(itertools.islice(holdout_dirs), 20000)
    cmd = ['python3', 'validate.py'] + list(holdout_dirs) + [
        '--use_tpu', '--tpu_name={}'.format(TPU_NAME),
        '--flagfile=rl_loop/distributed_flags', '--expand_validation_dirs'
    ]
    mask_flags.run(cmd)
Exemplo n.º 9
0
def run_tpu():
    mask_flags.checked_run([
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--checkpoint_dir={}'.format(fsdb.working_dir()),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format(
            fsdb.sgf_dir()), '--flags_path={}'.format(fsdb.flags_path()),
        '--run_forever=true', '--flagfile=rl_loop/distributed_flags'
    ])
Exemplo n.º 10
0
def rl_loop():
    state = State()

    # Play the first round of selfplay games with a fake model that returns
    # random noise. We do this instead of playing multiple games using a single
    # model bootstrapped with random noise to avoid any initial bias.
    # TODO(tommadams): disable holdout games for first round of selfplay.
    selfplay(state)
    state.engine = FLAGS.engine

    # Train a real model from the random selfplay games.
    tf_records = get_golden_chunk_records(1)
    state.iter_num += 1
    train(state, tf_records)

    # Select the newly trained model as the best.
    state.best_model_name = state.train_model_name
    state.gen_num += 1

    # Run selfplay using the new model.
    selfplay(state)

    # Now start the full training loop.
    while state.iter_num <= 100:
        # Build holdout glob before incrementing the iteration number because we
        # want to run validation on the previous generation.
        holdout_glob = os.path.join(fsdb.holdout_dir(),
                                    '%06d-*' % state.iter_num, '*')

        # Train on shuffled game data of the last 5 selfplay rounds, ignoring the
        # random bootstrapping round.
        # TODO(tommadams): potential improvments:
        #   - "slow window": increment number of models in window by 1 every 2
        #     generations.
        #   - uniformly resample the window each iteration (see TODO in selfplay
        #     for more info).
        tf_records = get_golden_chunk_records(min(5, state.iter_num))
        state.iter_num += 1
        train(state, tf_records)

        # These could all run in parallel.
        validate(state, holdout_glob)
        model_win_rate = evaluate(state, state.best_model_name)
        target_win_rate = evaluate(state, 'target')
        selfplay(state)

        # TODO(tommadams): 0.6 is required for 95% confidence at 100 eval games.
        # TODO(tommadams): if a model doesn't get promoted after N iterations,
        # consider deleting the most recent N training checkpoints because training
        # might have got stuck in a local minima.
        if model_win_rate >= 0.55:
            # Promote the trained model to the best model and increment the generation
            # number.
            state.best_model_name = state.train_model_name
            state.gen_num += 1

        yield target_win_rate
Exemplo n.º 11
0
async def selfplay(state):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

    Args:
        state: the RL loop State instance.
    """

    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

    commands = []
    num_selfplay_processes = len(FLAGS.selfplay_devices)
    if num_selfplay_processes == 1:
        commands.append([
            'bazel-bin/cc/selfplay', '--flagfile={}'.format(
                os.path.join(FLAGS.flags_dir, 'selfplay.flags')),
            '--num_games={}'.format(FLAGS.selfplay_num_games),
            '--parallel_games={}'.format(FLAGS.selfplay_num_games_per_thread),
            '--model={}:0,{}'.format(FLAGS.engine, state.best_model_path),
            '--output_dir={}/{}'.format(output_dir, 0),
            '--holdout_dir={}/{}'.format(holdout_dir, 0)
        ])
    else:
        for i, device in enumerate(FLAGS.selfplay_devices):
            a = ((i - 1) *
                 FLAGS.selfplay_num_games) // (num_selfplay_processes - 1)
            b = (i * FLAGS.selfplay_num_games) // (num_selfplay_processes - 1)
            num_games = b - a
            parallel_games = (
                (num_games + FLAGS.selfplay_num_games_per_thread - 1) //
                FLAGS.selfplay_num_games_per_thread)

            commands.append([
                'bazel-bin/cc/selfplay', '--flagfile={}'.format(
                    os.path.join(FLAGS.flags_dir, 'selfplay.flags')),
                '--num_games={}'.format(num_games),
                '--parallel_games={}'.format(parallel_games),
                '--model={}:{},{}'.format(FLAGS.engine, device,
                                          state.best_model_path),
                '--output_dir={}/{}'.format(output_dir, i),
                '--holdout_dir={}/{}'.format(holdout_dir, i)
            ])

    all_lines = await run_commands(commands)

    black_wins_total = white_wins_total = num_games = 0
    for lines in all_lines:
        result = '\n'.join(lines[-6:])
        logging.info(result)
        stats = parse_win_stats_table(result, 1)[0]
        num_games += stats.total_wins
        black_wins_total += stats.black_wins.total
        white_wins_total += stats.white_wins.total

    logging.info('Black won %0.3f, white won %0.3f',
                 black_wins_total / num_games, white_wins_total / num_games)
Exemplo n.º 12
0
def rl_loop():
    """The main reinforcement learning (RL) loop."""

    state = State()

    if FLAGS.checkpoint_dir:
        # Start from a partially trained model.
        initialize_from_checkpoint(state)
    else:
        # Play the first round of selfplay games with a fake model that returns
        # random noise. We do this instead of playing multiple games using a single
        # model bootstrapped with random noise to avoid any initial bias.
        wait(selfplay(state, 'bootstrap'))

        # Train a real model from the random selfplay games.
        tf_records = get_golden_chunk_records()
        state.iter_num += 1
        wait(train(state, tf_records))

        # Select the newly trained model as the best.
        state.best_model_name = state.train_model_name
        state.gen_num += 1

        # Run selfplay using the new model.
        wait(selfplay(state))

    # Now start the full training loop.
    while state.iter_num <= FLAGS.iterations:
        # Build holdout glob before incrementing the iteration number because we
        # want to run validation on the previous generation.
        holdout_glob = os.path.join(fsdb.holdout_dir(),
                                    '%06d-*' % state.iter_num, '*')

        # Train on shuffled game data from recent selfplay rounds.
        tf_records = get_golden_chunk_records()
        state.iter_num += 1
        wait(train(state, tf_records))

        if FLAGS.parallel_post_train:
            # Run eval, validation & selfplay in parallel.
            model_win_rate, _, _ = wait([
                evaluate_trained_model(state),
                validate(state, holdout_glob),
                selfplay(state)
            ])
        else:
            # Run eval, validation & selfplay sequentially.
            model_win_rate = wait(evaluate_trained_model(state))
            wait(validate(state, holdout_glob))
            wait(selfplay(state))

        if model_win_rate >= FLAGS.gating_win_rate:
            # Promote the trained model to the best model and increment the generation
            # number.
            state.best_model_name = state.train_model_name
            state.gen_num += 1
Exemplo n.º 13
0
async def bootstrap_selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.train_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.train_model_name)

    lines = await run(
        'bazel-bin/cc/concurrent_selfplay',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'bootstrap.flags')),
        '--output_dir={}/0'.format(output_dir),
        '--holdout_dir={}/0'.format(holdout_dir))
    logging.info('\n'.join(lines[-6:]))
Exemplo n.º 14
0
async def validate(state):
    dirs = [x.path for x in os.scandir(fsdb.holdout_dir()) if x.is_dir()]
    src_dirs = sorted(dirs, reverse=True)[:FLAGS.window_size]

    await run(
        'python3', 'validate.py',
        '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)),
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'validate.flags')),
        '--work_dir={}'.format(fsdb.working_dir()), '--expand_validation_dirs',
        *src_dirs)
Exemplo n.º 15
0
def rl_loop():
    state = State()

    # Play the first round of selfplay games with a fake model that returns
    # random noise. We do this instead of playing multiple games using a single
    # model bootstrapped with random noise to avoid any initial bias.
    selfplay(state, 'bootstrap')

    # Train a real model from the random selfplay games.
    tf_records = get_golden_chunk_records(1)
    state.iter_num += 1
    train(state, tf_records)

    # Select the newly trained model as the best.
    state.best_model_name = state.train_model_name
    state.gen_num += 1

    # Run selfplay using the new model.
    selfplay(state)

    # Now start the full training loop.
    while state.iter_num <= FLAGS.iterations:
        # Build holdout glob before incrementing the iteration number because we
        # want to run validation on the previous generation.
        holdout_glob = os.path.join(fsdb.holdout_dir(),
                                    '%06d-*' % state.iter_num, '*')

        # Calculate the window size from which we'll select training chunks.
        window = 1 + state.iter_num
        if window >= FLAGS.slow_window_size:
            window = (
                FLAGS.slow_window_size +
                (window - FLAGS.slow_window_size) // FLAGS.slow_window_speed)
        window = min(window, FLAGS.max_window_size)

        # Train on shuffled game data from recent selfplay rounds.
        tf_records = get_golden_chunk_records(window)
        state.iter_num += 1
        train(state, tf_records)

        # These could all run in parallel.
        validate(state, holdout_glob)
        model_win_rate = evaluate(state)
        selfplay(state)

        # TODO(tommadams): if a model doesn't get promoted after N iterations,
        # consider deleting the most recent N training checkpoints because training
        # might have got stuck in a local minima.
        if model_win_rate >= FLAGS.gating_win_rate:
            # Promote the trained model to the best model and increment the generation
            # number.
            state.best_model_name = state.train_model_name
            state.gen_num += 1
Exemplo n.º 16
0
async def selfplay(state, flagfile='selfplay', seed_factor=0):
  """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
    seed_factor: Factor to increase seed.
  """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

  lines = await run(
      'bazel-bin/cc/selfplay',
      '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
      '--model={}'.format(get_ckpt_path(state.best_model_path)),
      '--output_dir={}'.format(output_dir),
      '--holdout_dir={}'.format(holdout_dir),
      '--seed={}'.format(state.seed+100*seed_factor))
  result = '\n'.join(lines[-6:])
  logging.info(result)
  result = '\n'.join(lines[-50:])
  try:
      stats = parse_win_stats_table(result, 1)[0]
      num_games = stats.total_wins
      logging.info('Black won %0.3f, white won %0.3f',
                   stats.black_wins.total / num_games,
                   stats.white_wins.total / num_games)
  except AssertionError:
    # Poplar logging might screw up lines extraction approach.
    logging.error("No results to parse: \n %s" % lines[-50:])

  if not MULTI_SP:
    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                              state.output_model_name + '.tfrecord.zz'))
Exemplo n.º 17
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    logging.getLogger('mlperf_compliance').propagate = False

    ##-->multi-node setup
    if FLAGS.use_multinode:
        mpi_comm = MPI.COMM_WORLD
        mpi_rank = mpi_comm.Get_rank()
        mpi_size = mpi_comm.Get_size()
        print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format(
            mpi_rank, mpi_size, socket.gethostname()))
    else:
        mpi_comm = None
        mpi_rank = 0
        mpi_size = 1

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
    dirs = [
        fsdb.models_dir(),
        fsdb.selfplay_dir(),
        fsdb.holdout_dir(),
        fsdb.eval_dir(),
        fsdb.golden_chunk_dir(),
        fsdb.working_dir()
    ]

    ##-->sharedFS for dataExchange. tmp solution 5/6/2019
    if FLAGS.use_multinode:
        ensure_dir_exists(FLAGS.shared_dir_exchange)
    for d in dirs:
        ensure_dir_exists(d)

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(),
                                                'target.pb'))
    shutil.copy(FLAGS.target_path + '.og',
                os.path.join(fsdb.models_dir(), 'target.pb.og'))

    with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)):
        try:
            rl_loop(mpi_comm, mpi_rank, mpi_size)
        finally:
            asyncio.get_event_loop().close()
Exemplo n.º 18
0
async def bootstrap_selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.train_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.train_model_name)

    features = 'extra' if FLAGS.use_extra_features else 'agz'
    lines = await run(
        'bazel-bin/cc/concurrent_selfplay',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'bootstrap.flags')),
        '--model={}:0.4:0.4'.format(features),
        '--num_games={}'.format(FLAGS.min_games_per_iteration),
        '--output_dir={}/0'.format(output_dir),
        '--holdout_dir={}/0'.format(holdout_dir))
    logging.info('\n'.join(lines[-6:]))
Exemplo n.º 19
0
async def bootstrap_selfplay(state):
    output_name = '000000-000000'
    output_dir = os.path.join(fsdb.selfplay_dir(), output_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), output_name)
    sgf_dir = os.path.join(fsdb.sgf_dir(), output_name)

    lines = await run(
        'bazel-bin/cc/selfplay',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'bootstrap.flags')),
        '--num_games={}'.format(FLAGS.selfplay_num_games),
        '--parallel_games=32', '--model=random:0,0.4:0.4',
        '--output_dir={}/0'.format(output_dir),
        '--holdout_dir={}/0'.format(holdout_dir),
        '--sgf_dir={}'.format(sgf_dir))
    logging.info('\n'.join(lines[-6:]))
Exemplo n.º 20
0
async def selfplay(state, flagfile='selfplay'):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """

    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

    # instead of 2 workers in 1 process per device, we do 2 processes with 1 worker
    all_tasks = []
    loop = asyncio.get_event_loop()
    for i in range(FLAGS.num_gpus_selfplay * 2):  # 2 worker per device
        all_tasks.append(
            loop.create_task(
                selfplay_sub(state, output_dir, holdout_dir, flagfile, i)))
    all_lines = await asyncio.gather(*all_tasks, return_exceptions=True)

    black_wins_total = white_wins_total = num_games = 0
    for lines in all_lines:
        if type(lines) == RuntimeError or type(lines) == OSError:
            raise lines
            continue
        result = '\n'.join(lines[-6:])
        logging.info(result)
        stats = parse_win_stats_table(result, 1)[0]
        num_games += stats.total_wins
        black_wins_total += stats.black_wins.total
        white_wins_total += stats.white_wins.total

    logging.info('Black won %0.3f, white won %0.3f',
                 black_wins_total / num_games, white_wins_total / num_games)

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    if FLAGS.use_multinode:
        mpi_rank = MPI.COMM_WORLD.Get_rank()
        divide_record(state, pattern, FLAGS.num_gpus_train, mpi_rank)
    else:
        divide_record(state, pattern, FLAGS.num_gpus_train, -1)
Exemplo n.º 21
0
async def selfplay_multi(state, num_ipus):
  """ Start *num_ipu* selfplay processes """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
  flagfile = 'selfplay'

  all_tasks = []
  loop = asyncio.get_event_loop()
  for i in range(num_ipus):
    all_tasks.append(loop.create_task(selfplay_sub(state, output_dir, holdout_dir, flagfile, i)))
  all_lines = await asyncio.gather(*all_tasks, return_exceptions=True)

  black_wins_total = white_wins_total = num_games = 0
  for lines in all_lines:
    if type(lines) == RuntimeError or type(lines) == OSError:
      raise lines
    result = '\n'.join(lines[-6:])
    logging.info(result)
    stats = parse_win_stats_table(result, 1)[0]
    num_games += stats.total_wins
    black_wins_total += stats.black_wins.total
    white_wins_total += stats.white_wins.total

  logging.info('Black won %0.3f, white won %0.3f',
               black_wins_total / num_games,
               white_wins_total / num_games)

  # copy paste from selfplay to aggregate results
  # potentially should be parallized to training?

  # Write examples to a single record.
  pattern = os.path.join(output_dir, '*', '*.zz')
  random.seed(state.seed)
  tf.set_random_seed(state.seed)
  np.random.seed(state.seed)
  # TODO(tommadams): This method of generating one golden chunk per generation
  # is sub-optimal because each chunk gets reused multiple times for training,
  # introducing bias. Instead, a fresh dataset should be uniformly sampled out
  # of *all* games in the training window before the start of each training run.
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

  # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
  # so.
  logging.info('Writing golden chunk from "{}"'.format(pattern))
  buffer.parallel_fill(tf.gfile.Glob(pattern))
  buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                            state.output_model_name + '.tfrecord.zz'))
Exemplo n.º 22
0
def run_cc():
    _, model_name = fsdb.get_latest_model()
    num_games_finished = len(fsdb.get_games(model_name))
    if num_games_finished > 25000:
        print("{} has enough games! ({})".format(model_name,
                                                 num_games_finished))
        time.sleep(10 * 60)
        sys.exit()

    mask_flags.checked_run([
        'bazel-bin/cc/selfplay', '--model=tf,{}'.format(model_name),
        '--mode=selfplay', '--output_dir={}/{}'.format(fsdb.selfplay_dir(),
                                                       model_name),
        '--holdout_dir={}/{}'.format(fsdb.holdout_dir(), model_name),
        '--sgf_dir={}/{}'.format(fsdb.sgf_dir(), model_name),
        '--flagfile=rl_loop/distributed_flags'
    ])
Exemplo n.º 23
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  mll.init_start()
  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(),
          fsdb.mpi_log_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb'))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node))
  logging.info('Train nodes = {}'.format(FLAGS.train_node))
  logging.info('Eval nodes = {}'.format(FLAGS.eval_node))

  with logged_timer('Total time'):
    try:
      mll.init_stop()
      mll.run_start()
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
Exemplo n.º 24
0
def selfplay(state):
  play_output_name = state.play_output_name
  play_output_dir = os.path.join(fsdb.selfplay_dir(), play_output_name)
  play_holdout_dir = os.path.join(fsdb.holdout_dir(), play_output_name)

  result = checked_run([
      'external/minigo/cc/main', '--mode=selfplay', '--parallel_games=2048',
      '--num_readouts=100', '--model={}'.format(
          state.play_model_path), '--output_dir={}'.format(play_output_dir),
      '--holdout_dir={}'.format(play_holdout_dir)
  ] + cc_flags(state), 'selfplay')
  logging.info(get_lines(result, make_slice[-2:]))

  # Write examples to a single record.
  logging.info('Extracting examples')
  random.seed(state.seed)
  tensorflow.set_random_seed(state.seed)
  numpy.random.seed(state.seed)
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
  buffer.parallel_fill(
      tensorflow.gfile.Glob(os.path.join(play_output_dir, '*.zz')))
  buffer.flush(
      os.path.join(fsdb.golden_chunk_dir(), play_output_name + '.tfrecord.zz'))
Exemplo n.º 25
0
async def selfplay(state, flagfile='selfplay'):
  """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """

  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

  multi_instance, num_instance, flag_list = extract_multi_instance(
      ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))])
  sp_cmd = ['bazel-bin/cc/selfplay',
            '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
            '--model={}'.format(state.best_model_path),
            '--output_dir={}'.format(output_dir),
            '--holdout_dir={}'.format(holdout_dir)]
  if not multi_instance:
    lines = await run(
        *sp_cmd,
        '--seed={}'.format(state.seed))
  else:
    if FLAGS.selfplay_node == []:
      # run selfplay locally
      lines = await run(
          'python3', 'ml_perf/execute.py',
          '--num_instance={}'.format(num_instance),
          '--',
          *sp_cmd,
          '--seed={}'.format(state.seed))
    else:
      with logged_timer('selfplay mn'):
        # run one selfplay instance per host
        lines = await run_distributed(
            ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'],
            num_instance, FLAGS.selfplay_node, None, None, state.seed,
            *sp_cmd)

  result = '\n'.join(lines)
  with logged_timer('parse win stats'):
    stats = parse_win_stats_table(result, 1)[0]
    num_games = stats.total_wins
    black_total = stats.black_wins.total
    white_total = stats.white_wins.total

    logging.info('Black won %0.3f, white won %0.3f',
                 black_total / num_games,
                 white_total / num_games)
    bias = abs(white_total - black_total)/num_games
    logging.info('Black total %d, white total %d, total games %d, bias %0.3f.',
                 black_total, white_total, num_games, bias)

  with logged_timer('generate golden chunk'):
    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    files = tf.gfile.Glob(pattern)

    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)

    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    threads = FLAGS.golden_chunk_split
    file_list = []
    files_number = len(files)
    chunk_size = files_number // threads

    # split files into N seperate parts
    for i in range(threads):
      if i == threads - 1:
        file_list += [[i, files[chunk_size * i :]]]
      else:
        file_list += [[i, files[chunk_size * i : chunk_size * (i + 1)]]]
    pool = mp.Pool(threads)
    pool.map(functools.partial(gen_golden_chunk, state=state), file_list)

  return bias
Exemplo n.º 26
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.sgf_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    bootstrap_name = shipname.generate(0)
    bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)
    mask_flags.checked_run([
        'python3', 'bootstrap.py',
        '--export_path={}'.format(bootstrap_model_path),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    selfplay_cmd = [
        'python3', 'selfplay.py',
        '--load_file={}'.format(bootstrap_model_path),
        '--selfplay_dir={}'.format(
            os.path.join(fsdb.selfplay_dir(),
                         bootstrap_name)), '--holdout_dir={}'.format(
                             os.path.join(fsdb.holdout_dir(), bootstrap_name)),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
        '--flagfile=rl_loop/local_flags'
    ]

    # Selfplay twice
    mask_flags.checked_run(selfplay_cmd)
    mask_flags.checked_run(selfplay_cmd)
    # and once more to generate a held out game for validation
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

    # Double check that at least one sgf has been generated.
    assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

    print("Making shuffled golden chunk from selfplay data...")
    # TODO(amj): refactor example_buffer so it can be called the same way
    # as everything else.
    eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                      local_dir=fsdb.working_dir(),
                      game_dir=fsdb.selfplay_dir(),
                      model_num=1,
                      positions=64,
                      threads=8,
                      sampling_frac=1)

    tf_records = sorted(
        gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

    trained_model_name = shipname.generate(1)
    trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name)

    # Train on shuffled game data
    mask_flags.checked_run([
        'python3', 'train.py', *tf_records,
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(trained_model_path),
        '--flagfile=rl_loop/local_flags'
    ])

    # Validate the trained model on held out game
    mask_flags.checked_run([
        'python3', 'validate.py',
        os.path.join(fsdb.holdout_dir(), bootstrap_name),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    # Verify that trained model works for selfplay
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd +
                           ['--load_file={}'.format(trained_model_path)])

    mask_flags.checked_run([
        'python3', 'evaluate.py', bootstrap_model_path, trained_model_path,
        '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()),
        '--flagfile=rl_loop/local_flags'
    ])
    print("Completed integration test!")
Exemplo n.º 27
0
def rl_loop():
    """The main reinforcement learning (RL) loop."""

    # The 'window_size' reflect the split of golden chunk after selfplay
    # basically each selfplay generate N golden chunks instead of one to
    # accelerate write golden chunks (N determined by FLAGS.golden_chunk_slit).
    # Yet this make effective_window_size dynamic.   It should increase by N-1
    # to keep the effective window size not change.  Then increase by N if no big
    # chunk left.  Until it reach FLAGS.window_size * FLAGS.golden_chunk_split

    window_size = 0

    state = State()
    numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores
    train_instance_num = FLAGS.train_instance_per_numa * len(
        FLAGS.train_node) * numa_per_node
    selfplay_node_num = max(len(FLAGS.selfplay_node), 1)
    selfplay_num = selfplay_node_num
    out_files_number = int(train_instance_num /
                           gcd(train_instance_num, selfplay_num) *
                           selfplay_node_num)
    FLAGS.golden_chunk_split = out_files_number

    window_size = out_files_number * FLAGS.window_size

    if FLAGS.checkpoint_dir != None:
        # Start from a partially trained model.
        initialize_from_checkpoint(state, out_files_number)
        window_size = len(get_golden_chunk_records(window_size))
        mll.init_stop()
        mll.run_start()
        state.start_time = time.time()
    else:
        # Play the first round of selfplay games with a fake model that returns
        # random noise. We do this instead of playing multiple games using a single
        # model bootstrapped with random noise to avoid any initial bias.
        mll.init_stop()
        mll.run_start()
        state.start_time = time.time()
        mll.epoch_start(state.iter_num)
        wait(selfplay(state, 'bootstrap'))
        window_size += FLAGS.golden_chunk_split

        # Train a real model from the random selfplay games.
        state.iter_num += 1
        wait(train(state, window_size))
        post_train(state)

        # Select the newly trained model as the best.
        state.best_model_name = state.train_model_name
        state.gen_num += 1

        # Run selfplay using the new model.
        wait(selfplay(state))
        window_size += FLAGS.golden_chunk_split
        mll.epoch_stop(state.iter_num - 1)

    first_iter = True
    state_copy = None
    model_win_rate = -1.0
    # Now start the full training loop.
    while state.iter_num <= FLAGS.iterations:
        with logged_timer('iteration time {}'.format(state.iter_num)):
            mll.epoch_start(state.iter_num)
            # Build holdout glob before incrementing the iteration number because we
            # want to run validation on the previous generation.
            holdout_glob = os.path.join(fsdb.holdout_dir(),
                                        '%06d-*' % state.iter_num, '*')

            if FLAGS.parallel_post_train == 0:
                state.iter_num += 1
                wait(train(state, window_size))
                post_train(state)
                # Run eval, validation & selfplay sequentially.
                wait(selfplay(state))
                model_win_rate = wait(evaluate_trained_model(state))
                if model_win_rate >= FLAGS.gating_win_rate:
                    # Promote the trained model to the best model and increment the generation
                    # number.
                    state.best_model_name = state.train_model_name
                    state.gen_num += 1
                mll.epoch_stop(state.iter_num - 1)
                #                               ^ compensate iter_num += 1 above

            if FLAGS.parallel_post_train == 1:
                state.iter_num += 1
                wait([train(state, window_size), selfplay(state)])
                post_train(state)
                # Run eval, validation & selfplay in parallel.
                model_win_rate = wait(evaluate_trained_model(state))
                if model_win_rate >= FLAGS.gating_win_rate:
                    # Promote the trained model to the best model and increment the generation
                    # number.
                    state.best_model_name = state.train_model_name
                    state.gen_num += 1
                mll.epoch_stop(state.iter_num - 1)
                #                               ^ compensate iter_num += 1 above

            if FLAGS.parallel_post_train == 2:
                state_copy = copy.copy(state)
                state.iter_num += 1
                # run training and evaluation/validation/selfplay in parallel
                # this is software pipeline-ish parallelism
                # start train[iter]
                # |   start valiation[iter-1]
                # |   wait for validation
                # |   if not first time start evaluation[iter-1]
                # |   if not first time wait for evaluation
                # |   if not first time check for promotion
                # |   start selfplay[iter]
                # |   wait selfplay
                # wait train
                train_handle = asyncio.gather(train(state, window_size),
                                              return_exceptions=True)
                if not first_iter:
                    post_train(state_copy)
                    model_win_rate = wait(evaluate_trained_model(state_copy))
                    if model_win_rate >= FLAGS.gating_win_rate:
                        # Promote the trained model to the best model
                        state.best_model_name = state_copy.train_model_name
                    mll.epoch_stop(state.iter_num - 1 - 1)
                    #                               ^---^-- compensate iter_num += 1 above
                    #                                   +-- it is actually last iteration
                else:
                    first_iter = False
                wait(selfplay(state))
                asyncio.get_event_loop().run_until_complete(train_handle)
                if not first_iter:
                    if model_win_rate >= FLAGS.gating_win_rate:
                        # Increment the generation number.
                        train_model_name_before = state.train_model_name
                        state.gen_num += 1

                        # Output dependency:
                        # In parallel post train mode 1, there is output dependence between
                        # evaluation of iteration i (gen_num++)  and train of iteration i+1
                        # (use gen_num for export model path).  In parallel post train mode
                        # 2 (this mode), the evluation of iteration i is postponed to
                        # iteration i+1 after the training started, thus train of iteration
                        # i+1 won't generate correct model name when promotion needs to
                        # happen.  This part fix up the model name when evaluation decides
                        # there's a promotion
                        train_model_name_after = state.train_model_name
                        model_paths = glob.glob(
                            os.path.join(
                                fsdb.models_dir(),
                                '{}.*'.format(train_model_name_before)))
                        for model in model_paths:
                            logging.info('moving {} --> {}'.format(
                                model,
                                train_model_name_after.join(
                                    model.rsplit(train_model_name_before, 1))))
                            shutil.copy(
                                model,
                                train_model_name_after.join(
                                    model.rsplit(train_model_name_before, 1)))

    # after the main loop, if parallel_post_train = 2
    # needs to print epoch_stop for last epoch
    if FLAGS.parallel_post_train == 2:
        mll.epoch_stop(state.iter_num - 1)
Exemplo n.º 28
0
def main(unused_argv):

    for i in range(0, NUM_LOOP):
        if i == 0:
            src_model_name = shipname.generate(0)
            fsdb.switch_base(os.path.join(base_dir, src_model_name))
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            bootstrap_model_path = os.path.join(fsdb.models_dir(),
                                                src_model_name)
            mask_flags.checked_run([
                'python3', 'bootstrap.py',
                '--export_path={}'.format(bootstrap_model_path),
                '--work_dir={}'.format(fsdb.working_dir()),
                '--flagfile=rl_loop/local_flags'
            ])
            dst_model_name = shipname.generate(1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))
        else:
            src_model_name = dst_model_name
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            dst_model_name = shipname.generate(i + 1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))

        utils.ensure_dir_exists(fsdb.models_dir())
        utils.ensure_dir_exists(fsdb.selfplay_dir())
        utils.ensure_dir_exists(fsdb.holdout_dir())
        utils.ensure_dir_exists(fsdb.sgf_dir())
        utils.ensure_dir_exists(fsdb.eval_dir())
        utils.ensure_dir_exists(fsdb.golden_chunk_dir())
        utils.ensure_dir_exists(fsdb.working_dir())

        #bootstrap_name = shipname.generate(0)
        #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)

        print(src_model_name)
        print(src_model_path)
        selfplay_cmd = [
            'python3', 'selfplay.py', '--load_file={}'.format(src_model_path),
            '--selfplay_dir={}'.format(
                os.path.join(fsdb.selfplay_dir(),
                             dst_model_name)), '--holdout_dir={}'.format(
                                 os.path.join(fsdb.holdout_dir(),
                                              dst_model_name)),
            '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
            '--flagfile=rl_loop/local_flags'
        ]

        # Selfplay twice
        mask_flags.checked_run(selfplay_cmd)
        mask_flags.checked_run(selfplay_cmd)

        # and once more to generate a held out game for validation
        # exploits flags behavior where if you pass flag twice, second one wins.
        mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

        # Double check that at least one sgf has been generated.
        assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

        print("Making shuffled golden chunk from selfplay data...")
        # TODO(amj): refactor example_buffer so it can be called the same way
        # as everything else.
        eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                          local_dir=fsdb.working_dir(),
                          game_dir=fsdb.selfplay_dir(),
                          model_num=1,
                          positions=64,
                          threads=8,
                          sampling_frac=1)

        tf_records = sorted(
            gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

        #trained_model_name = shipname.generate(1)
        trained_model_name = dst_model_name
        trained_model_path = os.path.join(fsdb.models_dir(),
                                          trained_model_name)

        # Train on shuffled game data
        mask_flags.checked_run([
            'python3', 'train.py', *tf_records,
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(trained_model_path),
            '--flagfile=rl_loop/local_flags'
        ])

    print("Finished!")
Exemplo n.º 29
0
def selfplay_noasync(state, flagfile='selfplay'):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """

    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    base_seed = state.seed * FLAGS.num_gpus_selfplay * 2

    if FLAGS.use_multinode:
        mpi_rank = MPI.COMM_WORLD.Get_rank()
        base_seed = base_seed + (mpi_rank * 1433)

    mpi_info = MPI.Info.Create()
    num_workers = 2 * FLAGS.num_gpus_selfplay
    cores_per_worker = (FLAGS.cores_per_socket *
                        FLAGS.num_socket) // num_workers

    # TODO: set hosts to self play nodes here.
    mpi_info.Set("host", socket.gethostname())
    mpi_info.Set("bind_to", "none")
    icomm = MPI.COMM_SELF.Spawn("ompi_bind_DGX1.sh",
                                maxprocs=num_workers,
                                args=[
                                    'bazel-bin/cc/selfplay_mpi',
                                    '--flagfile={}.flags'.format(
                                        os.path.join(FLAGS.flags_dir,
                                                     flagfile)),
                                    '--model={}'.format(state.best_model_path),
                                    '--output_dir={}'.format(output_dir),
                                    '--holdout_dir={}'.format(holdout_dir),
                                    '--seed={}'.format(base_seed)
                                ],
                                info=mpi_info)

    icomm.barrier()
    icomm.Disconnect()

    black_wins_total = white_wins_total = num_games = 0

    #for lines in all_lines:
    #  if type(lines) == RuntimeError or type(lines) == OSError:
    #    raise lines
    #    continue
    #  result = '\n'.join(lines[-6:])
    #  logging.info(result)
    #  stats = parse_win_stats_table(result, 1)[0]
    #  num_games += stats.total_wins
    #  black_wins_total += stats.black_wins.total
    #  white_wins_total += stats.white_wins.total

    #logging.info('Black won %0.3f, white won %0.3f',
    #             black_wins_total / num_games,
    #             white_wins_total / num_games)

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)

    logging.info('Writing golden chunk from "{}"'.format(pattern))
    if FLAGS.use_multinode:
        mpi_rank = MPI.COMM_WORLD.Get_rank()
        divide_record(state, pattern, FLAGS.num_gpus_train, mpi_rank)
    else:
        divide_record(state, pattern, FLAGS.num_gpus_train, -1)
Exemplo n.º 30
0
async def selfplay(state, flagfile='selfplay'):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    output_dir = '/tmp/minigo' + output_dir

    multi_instance, num_instance, flag_list = extract_multi_instance([
        '--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir,
                                                     flagfile))
    ])
    sp_cmd = [
        'bazel-bin/cc/selfplay',
        '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
        '--model={}'.format(state.best_model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ]
    if not multi_instance:
        lines = await run(*sp_cmd, '--seed={}'.format(state.seed))
    else:
        if FLAGS.selfplay_node == []:
            # run selfplay locally
            lines = await run('python3', 'ml_perf/execute.py',
                              '--num_instance={}'.format(num_instance), '--',
                              *sp_cmd, '--seed={}'.format(state.seed))
        else:
            with logged_timer('selfplay mn'):
                # run one selfplay instance per host
                lines = await run_distributed(
                    ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'],
                    num_instance, FLAGS.selfplay_node, None, None, state.seed,
                    *sp_cmd)

    #result = '\n'.join(lines)
    #with logged_timer('parse win stats'):
    #  stats = parse_win_stats_table(result, 1)[0]
    #  num_games = stats.total_wins
    #  black_total = stats.black_wins.total
    #  white_total = stats.white_wins.total

    #  logging.info('Black won %0.3f, white won %0.3f',
    #               black_total / num_games,
    #               white_total / num_games)
    #  bias = abs(white_total - black_total)/num_games
    #  logging.info('Black total %d, white total %d, total games %d, bias %0.3f.',
    #               black_total, white_total, num_games, bias)

    with logged_timer('generate golden chunk'):
        # Write examples to a single record.
        hosts = FLAGS.selfplay_node
        if hosts == []:
            hosts = ['localhost']
        num_instance = len(hosts)
        numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores
        train_instance_num = FLAGS.train_instance_per_numa * len(
            FLAGS.train_node) * numa_per_node
        selfplay_node_num = len(hosts)
        selfplay_num = selfplay_node_num
        out_files_number = int(train_instance_num /
                               gcd(train_instance_num, selfplay_num))

        cmd = [
            'python3', 'ml_perf/divide_golden_chunk.py',
            '--read_path={}'.format(output_dir + "/*"),
            '--write_path={}'.format(
                os.path.join(fsdb.golden_chunk_dir(),
                             state.output_model_name + '.tfrecord.zz')),
            '--out_files_number={}'.format(out_files_number),
            '--physical_cores={}'.format(FLAGS.physical_cores),
            '--base_dir={}'.format(FLAGS.base_dir)
        ]
        lines = await run_distributed([], 1, hosts, None, None, state.seed,
                                      *cmd)