def gen_golden_chunk(files, state): buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill(files[1], threads=1) buffer.flush( os.path.join( fsdb.golden_chunk_dir(), state.output_model_name + '-{}.tfrecord.zz'.format(files[0])))
def selfplay(state): output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) model_path = os.path.join(fsdb.models_dir(), state.best_model_name) result = checked_run([ 'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100', '--model={}.pb'.format(model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir) ] + cc_flags(state), 'selfplay') logging.info(get_lines(result, make_slice[-2:])) # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush( os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def initialize_from_checkpoint(state, out_files_number): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob( os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) out_path = os.path.join(fsdb.golden_chunk_dir(), basename) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) example_num = buffer.parallel_fill(tf.gfile.Glob(path), FLAGS.physical_cores) buffer.flush_new(out_path, example_num, out_files_number, 1) # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir() + '/*.zz*', FLAGS.eval_min_max_every_epoch) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob( os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' shutil.copy(start_model_path, os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) shutil.copy( start_model_path + '.og', os.path.join(fsdb.models_dir(), state.best_model_name + '.pb.og')) # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) out_path = os.path.join(fsdb.golden_chunk_dir(), basename) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill(tf.gfile.Glob(path)) buffer.flush(out_path, FLAGS.num_gpus_train) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
async def selfplay(state, flagfile='selfplay', seed_factor=0): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. seed_factor: Factor to increase seed. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) lines = await run( 'bazel-bin/cc/selfplay', '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), '--model={}'.format(get_ckpt_path(state.best_model_path)), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir), '--seed={}'.format(state.seed+100*seed_factor)) result = '\n'.join(lines[-6:]) logging.info(result) result = '\n'.join(lines[-50:]) try: stats = parse_win_stats_table(result, 1)[0] num_games = stats.total_wins logging.info('Black won %0.3f, white won %0.3f', stats.black_wins.total / num_games, stats.white_wins.total / num_games) except AssertionError: # Poplar logging might screw up lines extraction approach. logging.error("No results to parse: \n %s" % lines[-50:]) if not MULTI_SP: # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def divide_record(state, pattern, num_out, rank): if rank < 0: rank_str = '' else: rank_str = '-mpirank-' + str(rank) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill(tf.gfile.Glob(pattern)) output = os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + rank_str + '.tfrecord.zz') buffer.flush(output, num_out) if rank >= 0: ##put files to exchange output = output + '*' put_files_exchange(state, rank, fileout=output) return
async def selfplay_multi(state, num_ipus): """ Start *num_ipu* selfplay processes """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) flagfile = 'selfplay' all_tasks = [] loop = asyncio.get_event_loop() for i in range(num_ipus): all_tasks.append(loop.create_task(selfplay_sub(state, output_dir, holdout_dir, flagfile, i))) all_lines = await asyncio.gather(*all_tasks, return_exceptions=True) black_wins_total = white_wins_total = num_games = 0 for lines in all_lines: if type(lines) == RuntimeError or type(lines) == OSError: raise lines result = '\n'.join(lines[-6:]) logging.info(result) stats = parse_win_stats_table(result, 1)[0] num_games += stats.total_wins black_wins_total += stats.black_wins.total white_wins_total += stats.white_wins.total logging.info('Black won %0.3f, white won %0.3f', black_wins_total / num_games, white_wins_total / num_games) # copy paste from selfplay to aggregate results # potentially should be parallized to training? # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def main(unused_argv): mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() mpi_size = mpi_comm.Get_size() # avoid seed out of range random.seed(FLAGS.seed % 1048576) tf.set_random_seed(FLAGS.seed % 1048576) np.random.seed(FLAGS.seed % 1048576) pattern = os.path.join(FLAGS.read_path, '*.zz') files = tf.gfile.Glob(pattern) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) example_num = buffer.parallel_fill(files, threads=FLAGS.physical_cores) # make sure all nodes generate same number of examples example_num = int(mpi_comm.allreduce(example_num, op=MPI.MIN)) buffer.flush_new(FLAGS.write_path + '_{}'.format(mpi_rank), example_num, FLAGS.out_files_number, threads=1) shutil.rmtree('/tmp/minigo/home', ignore_errors=True)
def selfplay(state): play_output_name = state.play_output_name play_output_dir = os.path.join(fsdb.selfplay_dir(), play_output_name) play_holdout_dir = os.path.join(fsdb.holdout_dir(), play_output_name) result = checked_run([ 'external/minigo/cc/main', '--mode=selfplay', '--parallel_games=2048', '--num_readouts=100', '--model={}'.format( state.play_model_path), '--output_dir={}'.format(play_output_dir), '--holdout_dir={}'.format(play_holdout_dir) ] + cc_flags(state), 'selfplay') logging.info(get_lines(result, make_slice[-2:])) # Write examples to a single record. logging.info('Extracting examples') random.seed(state.seed) tensorflow.set_random_seed(state.seed) numpy.random.seed(state.seed) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill( tensorflow.gfile.Glob(os.path.join(play_output_dir, '*.zz'))) buffer.flush( os.path.join(fsdb.golden_chunk_dir(), play_output_name + '.tfrecord.zz'))