def run_tpu(no_resign=False): os.environ[ 'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH'] = '/etc/ssl/certs/ca-certificates.crt' flagset = [ 'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu', '--model={}'.format( os.path.join(fsdb.working_dir(), 'model.ckpt-%d.pb')), '--output_dir={}'.format(fsdb.selfplay_dir()), '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--run_forever=true', '--output_bigtable={}'.format(FLAGS.output_bigtable) ] if 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ: flagset.append('--tpu_name={}'.format( os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'])) if no_resign: flagset.extend(['--flagfile=rl_loop/distributed_flags_nr']) else: flagset.extend([ '--flags_path={}'.format(fsdb.flags_path()), '--flagfile=rl_loop/distributed_flags' ]) mask_flags.checked_run(flagset)
def run_tpu(): mask_flags.checked_run([ 'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu', '--checkpoint_dir={}'.format(fsdb.working_dir()), '--output_dir={}'.format(fsdb.selfplay_dir()), '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format( fsdb.sgf_dir()), '--flags_path={}'.format(fsdb.flags_path()), '--run_forever=true', '--flagfile=rl_loop/distributed_flags' ])
async def bootstrap_selfplay(state): output_name = '000000-000000' output_dir = os.path.join(fsdb.selfplay_dir(), output_name) holdout_dir = os.path.join(fsdb.holdout_dir(), output_name) sgf_dir = os.path.join(fsdb.sgf_dir(), output_name) lines = await run( 'bazel-bin/cc/selfplay', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'bootstrap.flags')), '--num_games={}'.format(FLAGS.selfplay_num_games), '--parallel_games=32', '--model=random:0,0.4:0.4', '--output_dir={}/0'.format(output_dir), '--holdout_dir={}/0'.format(holdout_dir), '--sgf_dir={}'.format(sgf_dir)) logging.info('\n'.join(lines[-6:]))
def run_cc(): _, model_name = fsdb.get_latest_model() num_games_finished = len(fsdb.get_games(model_name)) if num_games_finished > 25000: print("{} has enough games! ({})".format(model_name, num_games_finished)) time.sleep(10 * 60) sys.exit() mask_flags.checked_run([ 'bazel-bin/cc/selfplay', '--model=tf,{}'.format(model_name), '--mode=selfplay', '--output_dir={}/{}'.format(fsdb.selfplay_dir(), model_name), '--holdout_dir={}/{}'.format(fsdb.holdout_dir(), model_name), '--sgf_dir={}/{}'.format(fsdb.sgf_dir(), model_name), '--flagfile=rl_loop/distributed_flags' ])
def main(unused_argv): for i in range(0, NUM_LOOP): if i == 0: src_model_name = shipname.generate(0) fsdb.switch_base(os.path.join(base_dir, src_model_name)) src_model_path = os.path.join(fsdb.models_dir(), src_model_name) bootstrap_model_path = os.path.join(fsdb.models_dir(), src_model_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) dst_model_name = shipname.generate(1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) else: src_model_name = dst_model_name src_model_path = os.path.join(fsdb.models_dir(), src_model_name) dst_model_name = shipname.generate(i + 1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) #bootstrap_name = shipname.generate(0) #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) print(src_model_name) print(src_model_path) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(src_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), dst_model_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), dst_model_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) #trained_model_name = shipname.generate(1) trained_model_name = dst_model_name trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) print("Finished!")
def main(unused_argv): """Run the reinforcement learning loop.""" utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) bootstrap_name = shipname.generate(0) bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(bootstrap_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), bootstrap_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), bootstrap_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) trained_model_name = shipname.generate(1) trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) # Validate the trained model on held out game mask_flags.checked_run([ 'python3', 'validate.py', os.path.join(fsdb.holdout_dir(), bootstrap_name), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) # Verify that trained model works for selfplay # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--load_file={}'.format(trained_model_path)]) mask_flags.checked_run([ 'python3', 'evaluate.py', bootstrap_model_path, trained_model_path, '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()), '--flagfile=rl_loop/local_flags' ]) print("Completed integration test!")