Exemplo n.º 1
0
def run_tpu(no_resign=False):
    os.environ[
        'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH'] = '/etc/ssl/certs/ca-certificates.crt'
    flagset = [
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--model={}'.format(
            os.path.join(fsdb.working_dir(), 'model.ckpt-%d.pb')),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--run_forever=true',
        '--output_bigtable={}'.format(FLAGS.output_bigtable)
    ]

    if 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ:
        flagset.append('--tpu_name={}'.format(
            os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS']))

    if no_resign:
        flagset.extend(['--flagfile=rl_loop/distributed_flags_nr'])
    else:
        flagset.extend([
            '--flags_path={}'.format(fsdb.flags_path()),
            '--flagfile=rl_loop/distributed_flags'
        ])

    mask_flags.checked_run(flagset)
Exemplo n.º 2
0
def run_tpu():
    mask_flags.checked_run([
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--checkpoint_dir={}'.format(fsdb.working_dir()),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format(
            fsdb.sgf_dir()), '--flags_path={}'.format(fsdb.flags_path()),
        '--run_forever=true', '--flagfile=rl_loop/distributed_flags'
    ])
async def bootstrap_selfplay(state):
    output_name = '000000-000000'
    output_dir = os.path.join(fsdb.selfplay_dir(), output_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), output_name)
    sgf_dir = os.path.join(fsdb.sgf_dir(), output_name)

    lines = await run(
        'bazel-bin/cc/selfplay',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'bootstrap.flags')),
        '--num_games={}'.format(FLAGS.selfplay_num_games),
        '--parallel_games=32', '--model=random:0,0.4:0.4',
        '--output_dir={}/0'.format(output_dir),
        '--holdout_dir={}/0'.format(holdout_dir),
        '--sgf_dir={}'.format(sgf_dir))
    logging.info('\n'.join(lines[-6:]))
Exemplo n.º 4
0
def run_cc():
    _, model_name = fsdb.get_latest_model()
    num_games_finished = len(fsdb.get_games(model_name))
    if num_games_finished > 25000:
        print("{} has enough games! ({})".format(model_name,
                                                 num_games_finished))
        time.sleep(10 * 60)
        sys.exit()

    mask_flags.checked_run([
        'bazel-bin/cc/selfplay', '--model=tf,{}'.format(model_name),
        '--mode=selfplay', '--output_dir={}/{}'.format(fsdb.selfplay_dir(),
                                                       model_name),
        '--holdout_dir={}/{}'.format(fsdb.holdout_dir(), model_name),
        '--sgf_dir={}/{}'.format(fsdb.sgf_dir(), model_name),
        '--flagfile=rl_loop/distributed_flags'
    ])
Exemplo n.º 5
0
def main(unused_argv):

    for i in range(0, NUM_LOOP):
        if i == 0:
            src_model_name = shipname.generate(0)
            fsdb.switch_base(os.path.join(base_dir, src_model_name))
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            bootstrap_model_path = os.path.join(fsdb.models_dir(),
                                                src_model_name)
            mask_flags.checked_run([
                'python3', 'bootstrap.py',
                '--export_path={}'.format(bootstrap_model_path),
                '--work_dir={}'.format(fsdb.working_dir()),
                '--flagfile=rl_loop/local_flags'
            ])
            dst_model_name = shipname.generate(1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))
        else:
            src_model_name = dst_model_name
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            dst_model_name = shipname.generate(i + 1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))

        utils.ensure_dir_exists(fsdb.models_dir())
        utils.ensure_dir_exists(fsdb.selfplay_dir())
        utils.ensure_dir_exists(fsdb.holdout_dir())
        utils.ensure_dir_exists(fsdb.sgf_dir())
        utils.ensure_dir_exists(fsdb.eval_dir())
        utils.ensure_dir_exists(fsdb.golden_chunk_dir())
        utils.ensure_dir_exists(fsdb.working_dir())

        #bootstrap_name = shipname.generate(0)
        #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)

        print(src_model_name)
        print(src_model_path)
        selfplay_cmd = [
            'python3', 'selfplay.py', '--load_file={}'.format(src_model_path),
            '--selfplay_dir={}'.format(
                os.path.join(fsdb.selfplay_dir(),
                             dst_model_name)), '--holdout_dir={}'.format(
                                 os.path.join(fsdb.holdout_dir(),
                                              dst_model_name)),
            '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
            '--flagfile=rl_loop/local_flags'
        ]

        # Selfplay twice
        mask_flags.checked_run(selfplay_cmd)
        mask_flags.checked_run(selfplay_cmd)

        # and once more to generate a held out game for validation
        # exploits flags behavior where if you pass flag twice, second one wins.
        mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

        # Double check that at least one sgf has been generated.
        assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

        print("Making shuffled golden chunk from selfplay data...")
        # TODO(amj): refactor example_buffer so it can be called the same way
        # as everything else.
        eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                          local_dir=fsdb.working_dir(),
                          game_dir=fsdb.selfplay_dir(),
                          model_num=1,
                          positions=64,
                          threads=8,
                          sampling_frac=1)

        tf_records = sorted(
            gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

        #trained_model_name = shipname.generate(1)
        trained_model_name = dst_model_name
        trained_model_path = os.path.join(fsdb.models_dir(),
                                          trained_model_name)

        # Train on shuffled game data
        mask_flags.checked_run([
            'python3', 'train.py', *tf_records,
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(trained_model_path),
            '--flagfile=rl_loop/local_flags'
        ])

    print("Finished!")
Exemplo n.º 6
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.sgf_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    bootstrap_name = shipname.generate(0)
    bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)
    mask_flags.checked_run([
        'python3', 'bootstrap.py',
        '--export_path={}'.format(bootstrap_model_path),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    selfplay_cmd = [
        'python3', 'selfplay.py',
        '--load_file={}'.format(bootstrap_model_path),
        '--selfplay_dir={}'.format(
            os.path.join(fsdb.selfplay_dir(),
                         bootstrap_name)), '--holdout_dir={}'.format(
                             os.path.join(fsdb.holdout_dir(), bootstrap_name)),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
        '--flagfile=rl_loop/local_flags'
    ]

    # Selfplay twice
    mask_flags.checked_run(selfplay_cmd)
    mask_flags.checked_run(selfplay_cmd)
    # and once more to generate a held out game for validation
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

    # Double check that at least one sgf has been generated.
    assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

    print("Making shuffled golden chunk from selfplay data...")
    # TODO(amj): refactor example_buffer so it can be called the same way
    # as everything else.
    eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                      local_dir=fsdb.working_dir(),
                      game_dir=fsdb.selfplay_dir(),
                      model_num=1,
                      positions=64,
                      threads=8,
                      sampling_frac=1)

    tf_records = sorted(
        gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

    trained_model_name = shipname.generate(1)
    trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name)

    # Train on shuffled game data
    mask_flags.checked_run([
        'python3', 'train.py', *tf_records,
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(trained_model_path),
        '--flagfile=rl_loop/local_flags'
    ])

    # Validate the trained model on held out game
    mask_flags.checked_run([
        'python3', 'validate.py',
        os.path.join(fsdb.holdout_dir(), bootstrap_name),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    # Verify that trained model works for selfplay
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd +
                           ['--load_file={}'.format(trained_model_path)])

    mask_flags.checked_run([
        'python3', 'evaluate.py', bootstrap_model_path, trained_model_path,
        '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()),
        '--flagfile=rl_loop/local_flags'
    ])
    print("Completed integration test!")