async def sample_training_examples(state): """Sample training examples from recent selfplay games. Args: state: the RL loop State instance. Returns: A list of golden chunks up to num_records in length, sorted by path. """ dirs = [x.path for x in os.scandir(fsdb.selfplay_dir()) if x.is_dir()] src_patterns = [] for d in sorted(dirs, reverse=True)[:FLAGS.window_size]: src_patterns.append(os.path.join(d, '*', '*', '*.tfrecord.zz')) dst_path = os.path.join(fsdb.golden_chunk_dir(), '{}.tfrecord.zz'.format(state.train_model_name)) logging.info('Writing training chunks to %s', dst_path) lines = await sample_records(src_patterns, dst_path, num_read_threads=8, num_write_threads=8, sample_frac=FLAGS.train_filter) logging.info('\n'.join(lines)) chunk_pattern = os.path.join( fsdb.golden_chunk_dir(), '{}-*-of-*.tfrecord.zz'.format(state.train_model_name)) chunk_paths = sorted(tf.gfile.Glob(chunk_pattern)) assert len(chunk_paths) == 8 return chunk_paths
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError('Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) shutil.copy(path, fsdb.golden_chunk_dir()) # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def fill_and_wait_models(bufsize=EXAMPLES_PER_GENERATION, write_dir=None, threads=8, model_window=100, skip_first_rsync=False): """ Fills a ringbuffer with positions from the most recent games, then continually rsync's and updates the buffer until a new model is promoted. Once it detects a new model, iit then dumps its contents for training to immediately begin on the next model. """ write_dir = write_dir or fsdb.golden_chunk_dir() buf = ExampleBuffer(bufsize) models = fsdb.get_models()[-model_window:] if not skip_first_rsync: with timer("Rsync"): smart_rsync(models[-1][0] - 6) files = tqdm(map(files_for_model, models), total=len(models)) buf.parallel_fill(list(itertools.chain(*files)), threads=threads) print("Filled buffer, watching for new games") while fsdb.get_latest_model()[0] == models[-1][0]: with timer("Rsync"): smart_rsync(models[-1][0] - 2) new_files = tqdm(map(files_for_model, models[-2:]), total=len(models)) buf.update(list(itertools.chain(*new_files))) time.sleep(60) latest = fsdb.get_latest_model() print("New model!", latest[1], "!=", models[-1][1]) print(buf) buf.flush(os.path.join(write_dir, str(latest[0] + 1) + '.tfrecord.zz'))
def get_files_exchange(state, mpi_rank): ##-->Train gets selfplay ##-->Self-play gets eval-model if mpi_rank == FLAGS.train_rank: selfplay_files = glob.glob( os.path.join(FLAGS.shared_dir_exchange, state.output_model_name + '-mpirank-*.zz*')) for filename in selfplay_files: print('Rank = {}, Getting file={} iter={} from SharedFS'.format( mpi_rank, filename, state.iter_num)) shutil.copy(filename, fsdb.golden_chunk_dir()) else: ##self-play needs to get training eval model dst_dir = os.path.join(fsdb.models_dir()) src_file = os.path.join(FLAGS.shared_dir_exchange, state.train_model_name + '.pb') print('Rank = {}, Getting file={} iter={} from SharedFS'.format( mpi_rank, src_file, state.iter_num)) shutil.copy(src_file, dst_dir) src_file = os.path.join(FLAGS.shared_dir_exchange, state.train_model_name + '.pb' + '.og') print('Rank = {}, Getting file={} iter={} from SharedFS'.format( mpi_rank, src_file, state.iter_num)) shutil.copy(src_file, dst_dir)
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()] for d in dirs: ensure_dir_exists(d); # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. for file_name in [ "target.pb", "target_raw.ckpt.data-00000-of-00001", "target_raw.ckpt.index", "target_raw.ckpt.meta"]: shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name, os.path.join(fsdb.models_dir(), file_name)) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
def rl_loop(): state = State() bootstrap(state) selfplay(state) while state.iter_num < 100: holdout_dir = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num) tf_records = os.path.join(fsdb.golden_chunk_dir(), '*.zz') tf_records = sorted(tensorflow.gfile.Glob(tf_records), reverse=True)[:5] state.iter_num += 1 # Train on shuffled game data of the last 5 selfplay rounds. train(state, tf_records) # These could run in parallel. validate(state, holdout_dir) model_win_rate = evaluate_model(state) target_win_rate = evaluate_target(state) # This could run in parallel to the rest. selfplay(state) if model_win_rate >= 0.55: # Promote the trained model to the play model. state.play_model_num = state.train_model_num state.play_model_name = state.train_model_name state.train_model_num += 1 elif model_win_rate < 0.4: # Bury the selfplay games which produced a significantly worse model. logging.info('Burying %s.', tf_records[0]) shutil.move(tf_records[0], tf_records[0] + '.bury') yield target_win_rate
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob( os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' shutil.copy(start_model_path, os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) shutil.copy( start_model_path + '.og', os.path.join(fsdb.models_dir(), state.best_model_name + '.pb.og')) # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) out_path = os.path.join(fsdb.golden_chunk_dir(), basename) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill(tf.gfile.Glob(path)) buffer.flush(out_path, FLAGS.num_gpus_train) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def selfplay(state): output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) model_path = os.path.join(fsdb.models_dir(), state.best_model_name) result = checked_run([ 'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100', '--model={}.pb'.format(model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir) ] + cc_flags(state), 'selfplay') logging.info(get_lines(result, make_slice[-2:])) # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush( os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): for target_win_rate in rl_loop(): if target_win_rate > 0.5: return logging.info('Passed exit criteria.') logging.info('Failed to converge.')
def gen_golden_chunk(files, state): buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill(files[1], threads=1) buffer.flush( os.path.join( fsdb.golden_chunk_dir(), state.output_model_name + '-{}.tfrecord.zz'.format(files[0])))
def get_golden_chunk_records(): """Return up to num_records of golden chunks to train on. Returns: A list of golden chunks up to num_records in length, sorted by path. """ pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz') return sorted(tf.gfile.Glob(pattern), reverse=True)[:FLAGS.window_size]
async def sample_training_examples(state): """Sample training examples from recent selfplay games. Args: state: the RL loop State instance. Returns: A list of golden chunks up to num_records in length, sorted by path. """ # Training examples are written out to the following directory hierarchy: # selfplay_dir/device_id/model_name/timestamp/ # Read examples from the most recent `window_size` models. device_dirs = [ x.path for x in os.scandir(fsdb.selfplay_dir()) if x.is_dir() ] models = set() for d in device_dirs: models.update([x.name for x in os.scandir(d) if x.is_dir()]) models = sorted(models, reverse=True)[:FLAGS.window_size] src_patterns = [] for d in device_dirs: for model in models: src_patterns.append(os.path.join(d, model, '*', '*.tfrecord.zz')) dst_path = os.path.join(fsdb.golden_chunk_dir(), '{}.tfrecord.zz'.format(state.train_model_name)) logging.info('Writing training chunks to %s', dst_path) lines = await sample_records(src_patterns, dst_path, num_read_threads=8, num_write_threads=8, sample_frac=FLAGS.train_filter) logging.info('\n'.join(lines)) chunk_pattern = os.path.join( fsdb.golden_chunk_dir(), '{}-*-of-*.tfrecord.zz'.format(state.train_model_name)) chunk_paths = sorted(tf.gfile.Glob(chunk_pattern)) assert len(chunk_paths) == 8 return chunk_paths
def post_train(state): model_path = os.path.join(fsdb.models_dir(), state.train_model_name) dual_net.optimize_graph(model_path + '.pb', model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) mll.save_model(state.iter_num-1) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
def get_golden_chunk_records(num_records): """Return up to num_records of golden chunks to train on. Args: num_records: maximum number of records to return. Returns: A list of golden chunks up to num_records in length, sorted by path. """ pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz') return sorted(tf.gfile.Glob(pattern), reverse=True)[:num_records]
def main(unused_argv): """Run the reinforcement learning loop.""" logging.getLogger('mlperf_compliance').propagate = False ##-->multi-node setup if FLAGS.use_multinode: mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() mpi_size = mpi_comm.Get_size() print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format( mpi_rank, mpi_size, socket.gethostname())) else: mpi_comm = None mpi_rank = 0 mpi_size = 1 print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [ fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir() ] ##-->sharedFS for dataExchange. tmp solution 5/6/2019 if FLAGS.use_multinode: ensure_dir_exists(FLAGS.shared_dir_exchange) for d in dirs: ensure_dir_exists(d) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb')) shutil.copy(FLAGS.target_path + '.og', os.path.join(fsdb.models_dir(), 'target.pb.og')) with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)): try: rl_loop(mpi_comm, mpi_rank, mpi_size) finally: asyncio.get_event_loop().close()
async def selfplay(state, flagfile='selfplay', seed_factor=0): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. seed_factor: Factor to increase seed. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) lines = await run( 'bazel-bin/cc/selfplay', '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), '--model={}'.format(get_ckpt_path(state.best_model_path)), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir), '--seed={}'.format(state.seed+100*seed_factor)) result = '\n'.join(lines[-6:]) logging.info(result) result = '\n'.join(lines[-50:]) try: stats = parse_win_stats_table(result, 1)[0] num_games = stats.total_wins logging.info('Black won %0.3f, white won %0.3f', stats.black_wins.total / num_games, stats.white_wins.total / num_games) except AssertionError: # Poplar logging might screw up lines extraction approach. logging.error("No results to parse: \n %s" % lines[-50:]) if not MULTI_SP: # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def divide_record(state, pattern, num_out, rank): if rank < 0: rank_str = '' else: rank_str = '-mpirank-' + str(rank) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill(tf.gfile.Glob(pattern)) output = os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + rank_str + '.tfrecord.zz') buffer.flush(output, num_out) if rank >= 0: ##put files to exchange output = output + '*' put_files_exchange(state, rank, fileout=output) return
async def selfplay_multi(state, num_ipus): """ Start *num_ipu* selfplay processes """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) flagfile = 'selfplay' all_tasks = [] loop = asyncio.get_event_loop() for i in range(num_ipus): all_tasks.append(loop.create_task(selfplay_sub(state, output_dir, holdout_dir, flagfile, i))) all_lines = await asyncio.gather(*all_tasks, return_exceptions=True) black_wins_total = white_wins_total = num_games = 0 for lines in all_lines: if type(lines) == RuntimeError or type(lines) == OSError: raise lines result = '\n'.join(lines[-6:]) logging.info(result) stats = parse_win_stats_table(result, 1)[0] num_games += stats.total_wins black_wins_total += stats.black_wins.total white_wins_total += stats.white_wins.total logging.info('Black won %0.3f, white won %0.3f', black_wins_total / num_games, white_wins_total / num_games) # copy paste from selfplay to aggregate results # potentially should be parallized to training? # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def fill_and_wait_time(bufsize=EXAMPLES_PER_GENERATION, write_dir=None, threads=32, start_from=None): start_from = start_from or dt.datetime.utcnow() write_dir = write_dir or fsdb.golden_chunk_dir() buf = ExampleBuffer(bufsize) chunk_to_make, fast_write = _determine_chunk_to_make(write_dir) hours = fsdb.get_hour_dirs() with timer("Rsync"): time_rsync( min(dt.datetime.strptime(hours[-1], "%Y-%m-%d-%H/"), start_from)) start_from = dt.datetime.utcnow() hours = fsdb.get_hour_dirs() files = (tf.gfile.Glob(os.path.join(LOCAL_DIR, d, "*.zz")) for d in reversed(hours) if tf.gfile.Exists(os.path.join(LOCAL_DIR, d))) files = itertools.islice(files, get_window_size(chunk_to_make)) models = fsdb.get_models() buf.parallel_fill(list(itertools.chain.from_iterable(files)), threads=threads) print("Filled buffer, watching for new games") while (fsdb.get_latest_model() == models[-1] or buf.total_updates < MINIMUM_NEW_GAMES): with timer("Rsync"): time_rsync(start_from - dt.timedelta(minutes=60)) start_from = dt.datetime.utcnow() hours = sorted(fsdb.get_hour_dirs(LOCAL_DIR)) new_files = list( map(lambda d: tf.gfile.Glob(os.path.join(LOCAL_DIR, d, '*.zz')), hours[-2:])) buf.update(list(itertools.chain.from_iterable(new_files))) if fast_write: break time.sleep(30) if fsdb.get_latest_model() != models[-1]: print("New model! Waiting for games. Got", buf.total_updates, "new games so far") latest = fsdb.get_latest_model() print("New model!", latest[1], "!=", models[-1][1]) print(buf) buf.flush(chunk_to_make)
def get_golden_chunk_records(state, mpi_size=1): """Return up to num_records of golden chunks to train on. Returns: A list of golden chunks up to num_records in length, sorted by path. """ ##how many selfplay nodes, do we fetch data from? num_selfplays = 1 if mpi_size == 1 else (mpi_size - 1) if state.iter_num <= FLAGS.window_size: win_size = (state.iter_num) * num_selfplays + (FLAGS.window_size - state.iter_num) else: win_size = (FLAGS.window_size) * num_selfplays print('Train get_golden_chunks at iter = {} has win_size = {}'.format( state.iter_num, win_size)) pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz*') return sorted(tf.gfile.Glob(pattern), reverse=True)[:win_size * FLAGS.num_gpus_train]
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) print(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) print(os.getcwd()) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir' '({}), got [{}]'.format( os.path.join(FLAGS.checkpoint_dir, 'work_dir'), ', '.join(model_paths))) start_model_path = model_paths[0] # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' shutil.copy(start_model_path, os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) start_model_files = glob.glob(os.path.join( FLAGS.checkpoint_dir, 'work_dir/model.ckpt-9383_raw.ckpt*')) for file_name in start_model_files: shutil.copy(file_name, os.path.join(fsdb.models_dir(), state.best_model_name + os.path.basename(file_name)[len("model.ckpt-9383"):])) # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, "..", 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) shutil.copy(path, fsdb.golden_chunk_dir()) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def main(unused_argv): """Run the reinforcement learning loop.""" mll.init_start() print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(), fsdb.mpi_log_dir()] for d in dirs: ensure_dir_exists(d); # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb')) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node)) logging.info('Train nodes = {}'.format(FLAGS.train_node)) logging.info('Eval nodes = {}'.format(FLAGS.eval_node)) with logged_timer('Total time'): try: mll.init_stop() mll.run_start() rl_loop() finally: asyncio.get_event_loop().close()
def selfplay(state): play_output_name = state.play_output_name play_output_dir = os.path.join(fsdb.selfplay_dir(), play_output_name) play_holdout_dir = os.path.join(fsdb.holdout_dir(), play_output_name) result = checked_run([ 'external/minigo/cc/main', '--mode=selfplay', '--parallel_games=2048', '--num_readouts=100', '--model={}'.format( state.play_model_path), '--output_dir={}'.format(play_output_dir), '--holdout_dir={}'.format(play_holdout_dir) ] + cc_flags(state), 'selfplay') logging.info(get_lines(result, make_slice[-2:])) # Write examples to a single record. logging.info('Extracting examples') random.seed(state.seed) tensorflow.set_random_seed(state.seed) numpy.random.seed(state.seed) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill( tensorflow.gfile.Glob(os.path.join(play_output_dir, '*.zz'))) buffer.flush( os.path.join(fsdb.golden_chunk_dir(), play_output_name + '.tfrecord.zz'))
def spawn_train_workers(state): # need to be removed tf_records = get_golden_chunk_records(state) comm_world = MPI.COMM_WORLD # spawn one worker process print("Spawning worker processes on {}".format(socket.gethostname())) mpi_info = MPI.Info.Create() num_workers = FLAGS.num_gpus_train # subtract 1 core from this value, oversubscription might not work cores_per_worker = (FLAGS.cores_per_socket * FLAGS.num_socket) // num_workers - 1 mpi_info.Set("host", socket.gethostname()) mpi_info.Set( "map_by", "ppr:{}:socket,PE={}".format(num_workers // FLAGS.num_socket, cores_per_worker)) icomm = MPI.COMM_SELF.Spawn( "python3", maxprocs=num_workers, args=[ 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format( os.path.join(fsdb.models_dir(), 'new_model')), '--training_seed=13337', '--num_selfplays={}'.format(comm_world.size - 1), '--window_iters={}'.format(FLAGS.window_size), '--total_iters={}'.format(FLAGS.iterations), '--golden_chunk_pattern={}'.format( os.path.join(fsdb.golden_chunk_dir(), '*.zz*')), '--freeze=true', '--use_multinode=true', '--use_mgpu_horovod=true' ], info=mpi_info) return icomm
def main(unused_argv): for i in range(0, NUM_LOOP): if i == 0: src_model_name = shipname.generate(0) fsdb.switch_base(os.path.join(base_dir, src_model_name)) src_model_path = os.path.join(fsdb.models_dir(), src_model_name) bootstrap_model_path = os.path.join(fsdb.models_dir(), src_model_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) dst_model_name = shipname.generate(1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) else: src_model_name = dst_model_name src_model_path = os.path.join(fsdb.models_dir(), src_model_name) dst_model_name = shipname.generate(i + 1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) #bootstrap_name = shipname.generate(0) #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) print(src_model_name) print(src_model_path) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(src_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), dst_model_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), dst_model_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) #trained_model_name = shipname.generate(1) trained_model_name = dst_model_name trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) print("Finished!")
def get_golden_chunk_records(num_records): # Sort the list of chunks so that the most recent ones are first and return # the requested prefix. pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz') return sorted(tf.gfile.Glob(pattern), reverse=True)[:num_records]
async def selfplay(state, flagfile='selfplay'): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) output_dir = '/tmp/minigo' + output_dir multi_instance, num_instance, flag_list = extract_multi_instance([ '--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)) ]) sp_cmd = [ 'bazel-bin/cc/selfplay', '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), '--model={}'.format(state.best_model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir) ] if not multi_instance: lines = await run(*sp_cmd, '--seed={}'.format(state.seed)) else: if FLAGS.selfplay_node == []: # run selfplay locally lines = await run('python3', 'ml_perf/execute.py', '--num_instance={}'.format(num_instance), '--', *sp_cmd, '--seed={}'.format(state.seed)) else: with logged_timer('selfplay mn'): # run one selfplay instance per host lines = await run_distributed( ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'], num_instance, FLAGS.selfplay_node, None, None, state.seed, *sp_cmd) #result = '\n'.join(lines) #with logged_timer('parse win stats'): # stats = parse_win_stats_table(result, 1)[0] # num_games = stats.total_wins # black_total = stats.black_wins.total # white_total = stats.white_wins.total # logging.info('Black won %0.3f, white won %0.3f', # black_total / num_games, # white_total / num_games) # bias = abs(white_total - black_total)/num_games # logging.info('Black total %d, white total %d, total games %d, bias %0.3f.', # black_total, white_total, num_games, bias) with logged_timer('generate golden chunk'): # Write examples to a single record. hosts = FLAGS.selfplay_node if hosts == []: hosts = ['localhost'] num_instance = len(hosts) numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores train_instance_num = FLAGS.train_instance_per_numa * len( FLAGS.train_node) * numa_per_node selfplay_node_num = len(hosts) selfplay_num = selfplay_node_num out_files_number = int(train_instance_num / gcd(train_instance_num, selfplay_num)) cmd = [ 'python3', 'ml_perf/divide_golden_chunk.py', '--read_path={}'.format(output_dir + "/*"), '--write_path={}'.format( os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz')), '--out_files_number={}'.format(out_files_number), '--physical_cores={}'.format(FLAGS.physical_cores), '--base_dir={}'.format(FLAGS.base_dir) ] lines = await run_distributed([], 1, hosts, None, None, state.seed, *cmd)
async def train(state, window_size): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. """ train_node = FLAGS.train_node num_node = len(train_node) if num_node == 0: dist_train = False else: dist_train = True if dist_train: intra_threads = FLAGS.numa_cores // FLAGS.train_instance_per_numa - 1 numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores instance_per_node = numa_per_node * FLAGS.train_instance_per_numa mpi_async_progress = '' for i in range(numa_per_node): for j in range(FLAGS.train_instance_per_numa): if (not i == 0) or (not j == 0): mpi_async_progress += ',' mpi_async_progress += '{}'.format(i * FLAGS.numa_cores + j) else: intra_threads = FLAGS.physical_cores model_path = os.path.join(fsdb.models_dir(), state.train_model_name) cmd = [ 'python3', 'train.py', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--window_size={}'.format(window_size), '--data_path={}'.format(fsdb.golden_chunk_dir()), '--training_seed={}'.format(state.seed), '--freeze=True', '--num_inter_threads=1', '--num_intra_threads={}'.format(intra_threads) ] if (dist_train): genvs = [ 'HOROVOD_FUSION_THRESHOLD=134217728', 'KMP_BLOCKTIME=0', 'KMP_HW_SUBSET=1T', 'OMP_BIND_PROC=true', 'I_MPI_ASYNC_PROGRESS_PIN=' + mpi_async_progress, 'OMP_NUM_THREADS={}'.format(intra_threads) ] hosts = [] proclists = [] numa_nodes = [] for node in range(num_node): # add all instance to the list for numa in range(numa_per_node): for instance in range(FLAGS.train_instance_per_numa): hosts += [train_node[node]] proclist = numa * FLAGS.numa_cores + FLAGS.train_instance_per_numa + instance * intra_threads proclists += ['{}'.format(proclist)] numa_nodes += ['{}'.format(numa)] lines = await run_distributed(genvs, 1, hosts, proclists, numa_nodes, None, *cmd, '--dist_train=True') else: lines = await run(*cmd) print('\n'.join(lines), file=sys.stderr)
def main(unused_argv): """Run the reinforcement learning loop.""" utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) bootstrap_name = shipname.generate(0) bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(bootstrap_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), bootstrap_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), bootstrap_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) trained_model_name = shipname.generate(1) trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) # Validate the trained model on held out game mask_flags.checked_run([ 'python3', 'validate.py', os.path.join(fsdb.holdout_dir(), bootstrap_name), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) # Verify that trained model works for selfplay # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--load_file={}'.format(trained_model_path)]) mask_flags.checked_run([ 'python3', 'evaluate.py', bootstrap_model_path, trained_model_path, '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()), '--flagfile=rl_loop/local_flags' ]) print("Completed integration test!")