def action(self): """计算落子并返回坐标""" # 1、模拟完全信息棋面 with utils.logged_timer("simulation"): self.board_sims = [] self.simOppLatest() print('num_sims: ', len(self.board_sims)) #print('one of sim:\n',self.board_sims[-1]) if len(self.board_sims) == 0: # 若模拟对手棋面失败,仅输入己方棋面信息 tmpGo = Position(n=9, board=self.board_selfNow, to_play=self.color) self.board_sims.append(tmpGo) # 2、计算每个可行位置的总得分 with utils.logged_timer("calculation"): pbs, vs = self.scoreNet.run_many(self.board_sims) scoreBoard = np.sum(pbs, axis=0) # 自己的位置得分设为零 selfPlaces = np.transpose(np.nonzero(self.board_selfNow)) for sp in selfPlaces: scoreBoard[sp[0] * 9 + sp[1]] = 0 # 自己内部的气不准下 board_innerQi = self.findInnerQi() scoreBoard.flat[[ i for (i, x) in enumerate(board_innerQi.flat) if x == 1 ]] = 0 # illegal的位置得分设为零 scoreBoard.flat[[ i for (i, x) in enumerate(self.illegalBoard.flat) if x == 1 ]] = 0 # 不主动pass scoreBoard = scoreBoard[:81] #print('scoreBoard:\n',scoreBoard) # pass的情况 if scoreBoard.sum() == 0: action = [-1, -1] self.tryAction = action else: flatMaxIdx = np.argmax(scoreBoard) action = [int(flatMaxIdx / 9), int(flatMaxIdx % 9)] self.tryAction = action with closing(shelve.open('buffer', 'c')) as shelf: shelf['color'] = self.color shelf['board_selfNow'] = self.board_selfNow shelf['board_opp_known'] = self.board_opp_known shelf['num_oppStones'] = self.num_oppStones return action
def validate( *tf_record_dirs: 'Directories where holdout data are', validate_name: 'Name for validation set (i.e., selfplay or human)'=None): tf_records = [] with utils.logged_timer("Building lists of holdout files"): for record_dir in tf_record_dirs: tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz'))) first_record = os.path.basename(tf_records[0]) last_record = os.path.basename(tf_records[-1]) with utils.logged_timer("Validating from {} to {}".format(first_record, last_record)): dual_net.validate(tf_records, validate_name=validate_name)
def evaluate( black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results' = 'sgf/evaluate', games: 'the number of games to play' = 16, verbose: 'How verbose the players should be (see selfplay)' = 1): utils.ensure_dir_exists(output_dir) with utils.logged_timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) with utils.logged_timer("Playing game"): evaluation.play_match(black_net, white_net, games, output_dir, verbose)
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
def main(argv): """Train on examples and export the updated model weights.""" tf_records = argv[1:] logging.info("Training on %s records: %s to %s", len(tf_records), tf_records[0], tf_records[-1]) if FLAGS.dist_train: hvd.init() mllogger = mllog.get_mllogger() mllog.config(filename="train.log") mllog.config(default_namespace="worker1", default_stack_offset=1, default_clear_line=False) with utils.logged_timer("Training"): train(*tf_records) if (not FLAGS.dist_train) or hvd.rank() == 0: if FLAGS.export_path: dual_net.export_model(FLAGS.export_path) epoch = int(os.path.basename(FLAGS.export_path)) mllogger.event(key="save_model", value={"Iteration": epoch}) if FLAGS.freeze: dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt, FLAGS.trt_max_batch_size, FLAGS.trt_precision, FLAGS.selfplay_precision)
def validate(*tf_records): """Validate a model's performance on a set of holdout data.""" if FLAGS.use_tpu: def _input_fn(params): return preprocessing.get_tpu_input_tensors( params['train_batch_size'], params['input_layout'], tf_records, filter_amount=1.0) else: def _input_fn(): return preprocessing.get_input_tensors(FLAGS.train_batch_size, FLAGS.input_layout, tf_records, filter_amount=1.0, shuffle_examples=False) steps = FLAGS.examples_to_validate // FLAGS.train_batch_size if FLAGS.use_tpu: steps //= FLAGS.num_tpu_cores estimator = dual_net.get_estimator() with utils.logged_timer("Validating"): estimator.evaluate(_input_fn, steps=steps, name=FLAGS.validate_name)
def validate( working_dir: 'tf.estimator working directory', *tf_record_dirs: 'Directories where holdout data are', checkpoint_name: 'Which checkpoint to evaluate (None=latest)'=None, validate_name: 'Name for validation set (i.e., selfplay or human)'=None): tf_records = [] with utils.logged_timer("Building lists of holdout files"): for record_dir in tf_record_dirs: tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz'))) first_record = os.path.basename(tf_records[0]) last_record = os.path.basename(tf_records[-1]) with utils.logged_timer("Validating from {} to {}".format(first_record, last_record)): dual_net.validate( working_dir, tf_records, checkpoint_name=checkpoint_name, validate_name=validate_name)
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): for target_win_rate in rl_loop(): if target_win_rate > 0.5: return logging.info('Passed exit criteria.') logging.info('Failed to converge.')
def train(trained_models_dir, estimator_model_dir, training_chunk_dir, params): """Train the latest model from gathered data. Args: trained_models_dir: Where to export the completed generation. estimator_model_dir: tf.estimator model directory. training_chunk_dir: Directory where gathered training chunks are. params: An object of hyperparameters for the model. """ model_num, model_name = utils.get_latest_model(trained_models_dir) print('Initializing from model {}'.format(model_name)) new_model_name = utils.generate_model_name(model_num + 1) print('New model will be {}'.format(new_model_name)) save_file = os.path.join(trained_models_dir, new_model_name) tf_records = sorted( tf.gfile.Glob(os.path.join(training_chunk_dir, '*' + _TF_RECORD_SUFFIX))) tf_records = tf_records[-(params.train_window_size // params.examples_per_chunk):] print('Training from: {} to {}'.format(tf_records[0], tf_records[-1])) with utils.logged_timer('Training'): dualnet.train(estimator_model_dir, tf_records, model_num + 1, params) dualnet.export_model(estimator_model_dir, save_file)
def train(trained_models_dir, estimator_model_dir, training_chunk_dir, generation, params): """Train the latest model from gathered data. Args: trained_models_dir: Where to export the completed generation. estimator_model_dir: tf.estimator model directory. training_chunk_dir: Directory where gathered training chunks are. generation: Which generation you are training. params: A MiniGoParams instance of hyperparameters for the model. """ new_model_name = utils.generate_model_name(generation) print('New model will be {}'.format(new_model_name)) new_model = os.path.join(trained_models_dir, new_model_name) print('Training on gathered game data...') tf_records = sorted( tf.gfile.Glob(os.path.join(training_chunk_dir, '*'+_TF_RECORD_SUFFIX))) tf_records = tf_records[ -(params.train_window_size // params.examples_per_chunk):] print('Training from: {} to {}'.format(tf_records[0], tf_records[-1])) with utils.logged_timer('Training'): dualnet.train(estimator_model_dir, tf_records, generation, params) dualnet.export_model(estimator_model_dir, new_model)
def load_player(model_path): print("Loading weights from %s ... " % model_path) with logged_timer("Loading weights from %s ... " % model_path): network = dual_net.DualNetwork(model_path) network.name = os.path.basename(model_path) player = MCTSPlayer(network, verbosity=2) return player
def evaluate(black_model_name, black_net, white_model_name, white_net, evaluate_dir, params): """Evaluate with two models. With two DualNetRunners to play as black and white in a Go match. Two models play several games, and the model that wins by a margin of 55% will be the winner. Args: black_model_name: The name of the model playing black. black_net: The DualNetRunner model for black white_model_name: The name of the model playing white. white_net: The DualNetRunner model for white. evaluate_dir: Where to write the evaluation results. Set as 'base_dir/sgf/evaluate/'. params: A MiniGoParams instance of hyperparameters for the model. Returns: The model name of the winner. Raises: ValueError: if neither `WHITE` or `BLACK` is returned. """ with utils.logged_timer('{} games'.format(params.eval_games)): winner = evaluation.play_match( params, black_net, white_net, params.eval_games, params.eval_readouts, evaluate_dir, params.eval_verbose) if winner != go.WHITE_NAME and winner != go.BLACK_NAME: raise ValueError('Winner should be either White or Black!') return black_model_name if winner == go.BLACK_NAME else white_model_name
def train(tf_records: 'list of files of tf_records to train on', model_save_path: 'Where to export the completed generation.'): print("Training on:", tf_records[0], "to", tf_records[-1]) with utils.logged_timer("Training"): dual_net.train(*tf_records) print("== Training done. Exporting model to ", model_save_path) dual_net.export_model(flags.FLAGS.model_dir, model_save_path) freeze_graph(model_save_path)
def run_game(load_file, selfplay_dir=None, holdout_dir=None, sgf_dir=None, holdout_pct=0.05): '''Takes a played game and record results and game data.''' if sgf_dir is not None: minimal_sgf_dir = os.path.join(sgf_dir, 'clean') full_sgf_dir = os.path.join(sgf_dir, 'full') utils.ensure_dir_exists(minimal_sgf_dir) utils.ensure_dir_exists(full_sgf_dir) if selfplay_dir is not None: utils.ensure_dir_exists(selfplay_dir) utils.ensure_dir_exists(holdout_dir) with utils.logged_timer("Loading weights from %s ... " % load_file): network = dual_net.DualNetwork(load_file) with utils.logged_timer("Playing game"): player = play(network) output_name = '{}-{}'.format(int(time.time()), socket.gethostname()) game_data = player.extract_data() if sgf_dir is not None: with gfile.GFile( os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=False)) with gfile.GFile( os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf()) tf_examples = preprocessing.make_dataset_from_selfplay(game_data) if selfplay_dir is not None: # Hold out 5% of games for validation. if random.random() < holdout_pct: fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name)) else: fname = os.path.join(selfplay_dir, "{}.tfrecord.zz".format(output_name)) preprocessing.write_tf_examples(fname, tf_examples)
def train(working_dir: 'tf.estimator working directory.', tf_records: 'list of files of tf_records to train on', model_save_path: 'Where to export the completed generation.'): print("Training on:", tf_records[0], "to", tf_records[-1]) with utils.logged_timer("Training"): dual_net.train(working_dir, tf_records) print("== Training done. Exporting model to ", model_save_path) dual_net.export_model(working_dir, model_save_path) freeze_graph(model_save_path)
def main(argv): """Train on examples and export the updated model weights.""" tf_records = argv[1:] logging.info("Training on %s records: %s to %s", len(tf_records), tf_records[0], tf_records[-1]) with utils.logged_timer("Training"): train(*tf_records) if FLAGS.export_path: dual_net.export_model(FLAGS.export_path)
def gather(selfplay_dir, training_chunk_dir, params): """Gather selfplay data into large training chunk. Args: selfplay_dir: Where to look for games. Set as 'base_dir/data/selfplay/'. training_chunk_dir: where to put collected games. Set as 'base_dir/data/training_chunks/'. params: An object of hyperparameters for the model. """ # Check the selfplay data from the most recent 50 models. _ensure_dir_exists(training_chunk_dir) sorted_model_dirs = sorted(tf.gfile.ListDirectory(selfplay_dir)) models = [ model_dir.strip('/') for model_dir in sorted_model_dirs[-params.gather_generation:] ] with utils.logged_timer('Finding existing tfrecords...'): model_gamedata = { model: tf.gfile.Glob( os.path.join(selfplay_dir, model, '*' + _TF_RECORD_SUFFIX)) for model in models } print('Found {} models'.format(len(models))) for model_name, record_files in sorted(model_gamedata.items()): print(' {}: {} files'.format(model_name, len(record_files))) meta_file = os.path.join(training_chunk_dir, 'meta.txt') try: with tf.gfile.GFile(meta_file, 'r') as f: already_processed = set(f.read().split()) except tf.errors.NotFoundError: already_processed = set() num_already_processed = len(already_processed) for model_name, record_files in sorted(model_gamedata.items()): if set(record_files) <= already_processed: continue print('Gathering files from {}:'.format(model_name)) tf_examples = preprocessing.shuffle_tf_examples( params.shuffle_buffer_size, params.examples_per_chunk, record_files) # tqdm to make the loops show a smart progress meter for i, example_batch in enumerate(tf_examples): output_record = os.path.join(training_chunk_dir, ('{}-{}' + _TF_RECORD_SUFFIX).format( model_name, str(i))) preprocessing.write_tf_examples(output_record, example_batch, serialize=False) already_processed.update(record_files) print('Processed {} new files'.format( len(already_processed) - num_already_processed)) with tf.gfile.GFile(meta_file, 'w') as f: f.write('\n'.join(sorted(already_processed)))
def evaluate(trained_models_dir, black_model_name, white_model_name, evaluate_dir, params): """Evaluate with two models. With the model name, construct two DualNetRunners to play as black and white in a Go match. Two models play several names, and the model that wins by a margin of 55% will be the winner. Args: trained_models_dir: Directories where the completed generations/models are. black_model_name: The name of the model playing black. white_model_name: The name of the model playing white. evaluate_dir: Where to write the evaluation results. Set as 'base_dir/sgf/evaluate/'' params: An object of hyperparameters for the model. Returns: The model name of the winner. Raises: ValueError: if neither `WHITE` or `BLACK` is returned. """ black_model = os.path.join(trained_models_dir, black_model_name) white_model = os.path.join(trained_models_dir, white_model_name) print('Evaluate models between {} and {}'.format(black_model_name, white_model_name)) _ensure_dir_exists(evaluate_dir) with utils.logged_timer('Loading weights'): black_net = dualnet.DualNetRunner(black_model, params) white_net = dualnet.DualNetRunner(white_model, params) with utils.logged_timer('{} games'.format(params.eval_games)): winner = evaluation.play_match(params, black_net, white_net, params.eval_games, params.eval_readouts, evaluate_dir, params.eval_verbose) if winner != go.WHITE_NAME and winner != go.BLACK_NAME: raise ValueError('Winner should be either White or Black!') return black_model_name if winner == go.BLACK_NAME else white_model_name
def train(working_dir: 'tf.estimator working directory.', tf_records: 'list of files of tf_records to train on', model_save_path: 'Where to export the completed generation.', generation_num: 'Which generation you are training.' = 0): print("Training on:", tf_records[0], "to", tf_records[-1]) with utils.logged_timer("Training"): dual_net.train(working_dir, tf_records, generation_num) print("Saving to", model_save_path) dual_net.export_model(working_dir, model_save_path) freeze_graph(model_save_path)
def checked_run(cmd, name): logging.info('Running %s:\n %s', name, '\n '.join(cmd)) with utils.logged_timer('%s finished' % name.capitalize()): completed_process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if completed_process.returncode: logging.error('Error running %s: %s', name, completed_process.stdout.decode()) raise RuntimeError('Non-zero return code executing %s' % ' '.join(cmd)) return completed_process
def validate(trained_models_dir, holdout_dir, estimator_model_dir, params): """Validate the latest model on the holdout dataset. Args: trained_models_dir: Directories where the completed generations/models are. holdout_dir: Directories where holdout data are. estimator_model_dir: tf.estimator model directory. params: A MiniGoParams instance of hyperparameters for the model. """ model_num, _ = utils.get_latest_model(trained_models_dir) # Get the holdout game data nums_names = utils.get_models(trained_models_dir) # Model N was trained on games up through model N-1, so the validation set # should only be for models through N-1 as well, thus the (model_num) term. models = [num_name for num_name in nums_names if num_name[0] < model_num] # pair is a tuple of (model_num, model_name), like (13, 000013-modelname) holdout_dirs = [ os.path.join(holdout_dir, pair[1]) for pair in models[-params.holdout_generation:] ] tf_records = [] with utils.logged_timer('Building lists of holdout files'): for record_dir in holdout_dirs: if os.path.exists(record_dir): # make sure holdout dir exists tf_records.extend( tf.gfile.Glob( os.path.join(record_dir, '*' + _TF_RECORD_SUFFIX))) if not tf_records: print('No holdout dataset for validation! ' 'Please check your holdout directory: {}'.format(holdout_dir)) return print('The length of tf_records is {}.'.format(len(tf_records))) first_tf_record = os.path.basename(tf_records[0]) last_tf_record = os.path.basename(tf_records[-1]) with utils.logged_timer('Validating from {} to {}'.format( first_tf_record, last_tf_record)): dualnet.validate(estimator_model_dir, tf_records, params)
def gather(selfplay_dir, training_chunk_dir, params): """Gather selfplay data into large training chunk. Args: selfplay_dir: Where to look for games. Set as 'base_dir/data/selfplay/'. training_chunk_dir: where to put collected games. Set as 'base_dir/data/training_chunks/'. params: A MiniGoParams instance of hyperparameters for the model. """ # Check the selfplay data from the most recent 50 models. _ensure_dir_exists(training_chunk_dir) sorted_model_dirs = sorted(tf.gfile.ListDirectory(selfplay_dir)) models = [model_dir.strip('/') for model_dir in sorted_model_dirs[-params.gather_generation:]] with utils.logged_timer('Finding existing tfrecords...'): model_gamedata = { model: tf.gfile.Glob( os.path.join(selfplay_dir, model, '*'+_TF_RECORD_SUFFIX)) for model in models } print('Found {} models'.format(len(models))) for model_name, record_files in sorted(model_gamedata.items()): print(' {}: {} files'.format(model_name, len(record_files))) meta_file = os.path.join(training_chunk_dir, 'meta.txt') try: with tf.gfile.GFile(meta_file, 'r') as f: already_processed = set(f.read().split()) except tf.errors.NotFoundError: already_processed = set() num_already_processed = len(already_processed) for model_name, record_files in sorted(model_gamedata.items()): if set(record_files) <= already_processed: continue print('Gathering files from {}:'.format(model_name)) tf_examples = preprocessing.shuffle_tf_examples( params.shuffle_buffer_size, params.examples_per_chunk, record_files) # tqdm to make the loops show a smart progress meter for i, example_batch in enumerate(tf_examples): output_record = os.path.join( training_chunk_dir, ('{}-{}'+_TF_RECORD_SUFFIX).format(model_name, str(i))) preprocessing.write_tf_examples( output_record, example_batch, serialize=False) already_processed.update(record_files) print('Processed {} new files'.format( len(already_processed) - num_already_processed)) with tf.gfile.GFile(meta_file, 'w') as f: f.write('\n'.join(sorted(already_processed)))
def validate(trained_models_dir, holdout_dir, estimator_model_dir, params): """Validate the latest model on the holdout dataset. Args: trained_models_dir: Directories where the completed generations/models are. holdout_dir: Directories where holdout data are. estimator_model_dir: tf.estimator model directory. params: A MiniGoParams instance of hyperparameters for the model. """ model_num, _ = utils.get_latest_model(trained_models_dir) # Get the holdout game data nums_names = utils.get_models(trained_models_dir) # Model N was trained on games up through model N-1, so the validation set # should only be for models through N-1 as well, thus the (model_num) term. models = [num_name for num_name in nums_names if num_name[0] < model_num] # pair is a tuple of (model_num, model_name), like (13, 000013-modelname) holdout_dirs = [os.path.join(holdout_dir, pair[1]) for pair in models[-params.holdout_generation:]] tf_records = [] with utils.logged_timer('Building lists of holdout files'): for record_dir in holdout_dirs: if os.path.exists(record_dir): # make sure holdout dir exists tf_records.extend( tf.gfile.Glob(os.path.join(record_dir, '*'+_TF_RECORD_SUFFIX))) if not tf_records: print('No holdout dataset for validation! ' 'Please check your holdout directory: {}'.format(holdout_dir)) return print('The length of tf_records is {}.'.format(len(tf_records))) first_tf_record = os.path.basename(tf_records[0]) last_tf_record = os.path.basename(tf_records[-1]) with utils.logged_timer('Validating from {} to {}'.format( first_tf_record, last_tf_record)): dualnet.validate(estimator_model_dir, tf_records, params)
def main(argv): """Validate a model's performance on a set of holdout data.""" _, *validation_paths = argv if FLAGS.expand_validation_dirs: tf_records = [] with utils.logged_timer("Building lists of holdout files"): for record_dir in validation_paths: tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz'))) else: tf_records = validation_paths if not tf_records: raise RuntimeError("Did not find any holdout files for validating!") validate(*tf_records)
def selfplay(load_file: "The path to the network model files", output_dir: "Where to write the games" = "data/selfplay", holdout_dir: "Where to write the games" = "data/holdout", output_sgf: "Where to write the sgfs" = "sgf/", verbose: '>=2 will print debug info, >=3 will print boards' = 1, holdout_pct: 'how many games to hold out for validation' = 0.05): clean_sgf = os.path.join(output_sgf, 'clean') full_sgf = os.path.join(output_sgf, 'full') utils.ensure_dir_exists(clean_sgf) utils.ensure_dir_exists(full_sgf) utils.ensure_dir_exists(output_dir) utils.ensure_dir_exists(holdout_dir) with utils.logged_timer("Loading weights from %s ... " % load_file): network = dual_net.DualNetwork(load_file) with utils.logged_timer("Playing game"): player = selfplay_mcts.play(network, verbose) output_name = '{}-{}'.format(int(time.time()), socket.gethostname()) game_data = player.extract_data() with gfile.GFile(os.path.join(clean_sgf, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=False)) with gfile.GFile(os.path.join(full_sgf, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf()) tf_examples = preprocessing.make_dataset_from_selfplay(game_data) # Hold out 5% of games for evaluation. if random.random() < holdout_pct: fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name)) else: fname = os.path.join(output_dir, "{}.tfrecord.zz".format(output_name)) preprocessing.write_tf_examples(fname, tf_examples)
def main(argv): """Train on examples and export the updated model weights.""" tf_records = argv[1:] logging.info("Training on %s records: %s to %s", len(tf_records), tf_records[0], tf_records[-1]) with utils.logged_timer("Training"): train(*tf_records) if FLAGS.export_path: dual_net.export_model(FLAGS.export_path) if FLAGS.freeze: if FLAGS.use_tpu: dual_net.freeze_graph_tpu(FLAGS.export_path) else: dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt, FLAGS.trt_max_batch_size, FLAGS.trt_precision)
def checked_run(name, *cmd): # Read & expand any flagfiles specified on the commandline so we can know # exactly what's going on. expanded = flags.FlagValues().read_flags_from_files(cmd) logging.info('Running %s:\n %s', name, ' '.join(expanded)) with utils.logged_timer('%s finished' % name.capitalize()): completed_process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if completed_process.returncode: logging.error('Error running %s: %s', name, completed_process.stdout.decode()) raise RuntimeError('Non-zero return code executing %s' % ' '.join(cmd)) return completed_process
def checked_run(name, *cmd): # Log the expanded & deduped list of command line arguments, so we can know # exactly what's going on. Note that we don't pass the expanded list of # arguments to the actual subprocess because of a quirk in how unknown flags # are handled: unknown flags in flagfiles are silently ignored, while unknown # flags on the command line will cause the subprocess to abort. logging.info( 'Running %s:\n %s %s', name, cmd[0], ' '.join(expand_flags(*cmd))) with utils.logged_timer('%s finished' % name.capitalize()): completed_process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if completed_process.returncode: logging.error('Error running %s: %s', name, completed_process.stdout.decode()) raise RuntimeError('Non-zero return code executing %s' % ' '.join(cmd)) return completed_process
async def checked_run(*cmd): """Run the given subprocess command in a coroutine. Args: *cmd: the command to run and its arguments. Returns: The output that the command wrote to stdout as a list of strings, one line per element (stderr output is piped to stdout). Raises: RuntimeError: if the command returns a non-zero result. """ # Start the subprocess. logging.info('Running: %s', expand_cmd_str(cmd)) with utils.logged_timer('{} finished'.format(get_cmd_name(cmd))): p = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT) # Stream output from the process stdout. chunks = [] while True: chunk = await p.stdout.read(16 * 1024) if not chunk: break chunks.append(chunk) # Wait for the process to finish, check it was successful & build stdout. await p.wait() stdout = b''.join(chunks).decode()[:-1] if p.returncode: raise RuntimeError('Return code {} from process: {}\n{}'.format( p.returncode, expand_cmd_str(cmd), stdout)) log_path = os.path.join(FLAGS.base_dir, get_cmd_name(cmd) + '.log') with gfile.Open(log_path, 'a') as f: f.write(expand_cmd_str(cmd)) f.write('\n') f.write(stdout) f.write('\n') # Split stdout into lines. return stdout.split('\n')
def selfplay(selfplay_dirs, selfplay_model, params): """Perform selfplay with a specific model. Args: selfplay_dirs: A dict to specify the directories used in selfplay. selfplay_dirs = { 'output_dir': output_dir, 'holdout_dir': holdout_dir, 'clean_sgf': clean_sgf, 'full_sgf': full_sgf } selfplay_model: The actual Dualnet runner for selfplay. params: A MiniGoParams instance of hyperparameters for the model. """ with utils.logged_timer('Playing game'): player = selfplay_mcts.play(params.board_size, selfplay_model, params.selfplay_readouts, params.selfplay_resign_threshold, params.simultaneous_leaves, params.selfplay_verbose) output_name = '{}-{}'.format(int(time.time()), socket.gethostname()) def _write_sgf_data(dir_sgf, use_comments): with tf.gfile.GFile( os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=use_comments)) _write_sgf_data(selfplay_dirs['clean_sgf'], use_comments=False) _write_sgf_data(selfplay_dirs['full_sgf'], use_comments=True) game_data = player.extract_data() tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params) # Hold out 5% of games for evaluation. if random.random() < params.holdout_pct: fname = os.path.join(selfplay_dirs['holdout_dir'], output_name + _TF_RECORD_SUFFIX) else: fname = os.path.join(selfplay_dirs['output_dir'], output_name + _TF_RECORD_SUFFIX) preprocessing.write_tf_examples(fname, tf_examples)
def main(argv): """Train on examples and export the updated model weights.""" tf_records = argv[1:] logging.info("Training on %s records: %s to %s", len(tf_records), tf_records[0], tf_records[-1]) with utils.logged_timer("Training"): estimator = train(*tf_records) if FLAGS.export_path: dual_net.export_model(FLAGS.export_path) estimator.export_saved_model(FLAGS.export_path, serving_input_receiver_fn()) else: estimator.export_saved_model('saved_model', serving_input_receiver_fn()) if FLAGS.freeze: if FLAGS.use_tpu: dual_net.freeze_graph_tpu(FLAGS.export_path) else: dual_net.freeze_graph(FLAGS.export_path)
def run_game(network, args, device=None, sgf_dir=None, holdout_pct=0.05): '''Takes a played game and record results and game data.''' selfplay_dir = os.path.join(args.selfplay_dir, args.model_name) utils.ensure_dir_exists(selfplay_dir) holdout_dir = os.path.join(args.holdout_dir, args.model_name) utils.ensure_dir_exists(holdout_dir) if args.sgf_dir: sgf_dir = os.path.join(args.sgf_dir, args.model_name) utils.ensure_dir_exists(sgf_dir) if sgf_dir is not None: minimal_sgf_dir = os.path.join(sgf_dir, 'clean') full_sgf_dir = os.path.join(sgf_dir, 'full') utils.ensure_dir_exists(minimal_sgf_dir) utils.ensure_dir_exists(full_sgf_dir) if selfplay_dir is not None: utils.ensure_dir_exists(selfplay_dir) utils.ensure_dir_exists(holdout_dir) with utils.logged_timer("Playing game"): player = play(network, args, device=device) features, pis, values = player.extract_data(return_features=True) features = np.array(features) pis = np.array(pis) values = np.array(values) assert features.shape[0] == pis.shape[0] == values.shape[0] output_name = '{}-{}'.format(int(time.time()), features.shape[0]) if sgf_dir is not None: with open(os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=False)) with open(os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf()) if selfplay_dir is not None: # Hold out 5% of games for validation. if random.random() < holdout_pct: fname = os.path.join(holdout_dir, "{}.hdf5".format(output_name)) else: fname = os.path.join(selfplay_dir, "{}.hdf5".format(output_name)) preprocessing.save_h5_examples(fname, features, pis, values)
def selfplay(selfplay_dirs, selfplay_model, params): """Perform selfplay with a specific model. Args: selfplay_dirs: A dict to specify the directories used in selfplay. selfplay_dirs = { 'output_dir': output_dir, 'holdout_dir': holdout_dir, 'clean_sgf': clean_sgf, 'full_sgf': full_sgf } selfplay_model: The actual Dualnet runner for selfplay. params: A MiniGoParams instance of hyperparameters for the model. """ with utils.logged_timer('Playing game'): player = selfplay_mcts.play( params.board_size, selfplay_model, params.selfplay_readouts, params.selfplay_resign_threshold, params.simultaneous_leaves, params.selfplay_verbose) output_name = '{}-{}'.format(int(time.time()), socket.gethostname()) def _write_sgf_data(dir_sgf, use_comments): with tf.gfile.GFile( os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=use_comments)) _write_sgf_data(selfplay_dirs['clean_sgf'], use_comments=False) _write_sgf_data(selfplay_dirs['full_sgf'], use_comments=True) game_data = player.extract_data() tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params) # Hold out 5% of games for evaluation. if random.random() < params.holdout_pct: fname = os.path.join( selfplay_dirs['holdout_dir'], output_name + _TF_RECORD_SUFFIX) else: fname = os.path.join( selfplay_dirs['output_dir'], output_name + _TF_RECORD_SUFFIX) preprocessing.write_tf_examples(fname, tf_examples)
def _prepare_selfplay( model_name, trained_models_dir, selfplay_dir, holdout_dir, sgf_dir, params): """Set directories and load the network for selfplay. Args: model_name: The name of the model for self-play trained_models_dir: Directories where the completed generations/models are. selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'. holdout_dir: Where to write the holdout data. Set as 'base_dir/data/holdout/'. sgf_dir: Where to write the sgf (Smart Game Format) files. Set as 'base_dir/sgf/'. params: A MiniGoParams instance of hyperparameters for the model. Returns: The directories and network model for selfplay. """ # Set paths for the model with 'model_name' model_path = os.path.join(trained_models_dir, model_name) output_dir = os.path.join(selfplay_dir, model_name) holdout_dir = os.path.join(holdout_dir, model_name) # clean_sgf is to write sgf file without comments. # full_sgf is to write sgf file with comments. clean_sgf = os.path.join(sgf_dir, model_name, 'clean') full_sgf = os.path.join(sgf_dir, model_name, 'full') _ensure_dir_exists(output_dir) _ensure_dir_exists(holdout_dir) _ensure_dir_exists(clean_sgf) _ensure_dir_exists(full_sgf) selfplay_dirs = { 'output_dir': output_dir, 'holdout_dir': holdout_dir, 'clean_sgf': clean_sgf, 'full_sgf': full_sgf } # cache the network model for self-play with utils.logged_timer('Loading weights from {} ... '.format(model_path)): network = dualnet.DualNetRunner(model_path, params) return selfplay_dirs, network
def main(_): """Run the reinforcement learning loop.""" tf.logging.set_verbosity(tf.logging.INFO) params = _set_params(FLAGS) # A dummy model for debug/testing purpose with fewer games and iterations if FLAGS.test: params = model_params.DummyMiniGoParams() base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size_dummy/' else: # Set directories for models and datasets base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size/' dirs = utils.MiniGoDirectory(base_dir) # Run selfplay only if user specifies the argument. if FLAGS.selfplay: selfplay_model_name = FLAGS.selfplay_model_name or utils.get_latest_model( dirs.trained_models_dir)[1] max_games = FLAGS.selfplay_max_games or params.max_games_per_generation run_selfplay(selfplay_model_name, max_games, dirs, params) return # Run the RL pipeline # if no models have been trained, start from bootstrap model if not os.path.isdir(dirs.trained_models_dir): print('No trained model exists! Starting from Bootstrap...') print('Creating random initial weights...') bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params) else: print('A MiniGo base directory has been found! ') print('Start from the last checkpoint...') _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir) for rl_iter in range(params.max_iters_per_pipeline): print('RL_iteration: {}'.format(rl_iter)) # Self-play with the best model to generate training data run_selfplay( best_model_so_far, params.max_games_per_generation, dirs, params) # gather selfplay data for training print('Gathering game output...') gather(dirs.selfplay_dir, dirs.training_chunk_dir, params) # train the next generation model model_num, _ = utils.get_latest_model(dirs.trained_models_dir) print('Training on gathered game data...') train(dirs.trained_models_dir, dirs.estimator_model_dir, dirs.training_chunk_dir, model_num + 1, params) # validate the latest model if needed if FLAGS.validation: print('Validating on the holdout game data...') validate(dirs.trained_models_dir, dirs.holdout_dir, dirs.estimator_model_dir, params) _, current_model = utils.get_latest_model(dirs.trained_models_dir) if FLAGS.evaluation: # Perform evaluation if needed print('Evaluate models between {} and {}'.format( best_model_so_far, current_model)) black_model = os.path.join(dirs.trained_models_dir, best_model_so_far) white_model = os.path.join(dirs.trained_models_dir, current_model) _ensure_dir_exists(dirs.evaluate_dir) with utils.logged_timer('Loading weights'): black_net = dualnet.DualNetRunner(black_model, params) white_net = dualnet.DualNetRunner(white_model, params) best_model_so_far = evaluate( best_model_so_far, black_net, current_model, white_net, dirs.evaluate_dir, params) print('Winner of evaluation: {}!'.format(best_model_so_far)) else: best_model_so_far = current_model