Exemplo n.º 1
0
def rl_loop():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    if goparams.DUMMY_MODEL:
        # monkeypatch the hyperparams so that we get a quickly executing network.
        dual_net.get_default_hyperparams = lambda **kwargs: {
            'k': 8, 'fc_width': 16, 'num_shared_layers': 1, 'l2_strength': 1e-4, 'momentum': 0.9}

        dual_net.TRAIN_BATCH_SIZE = 16
        dual_net.EXAMPLES_PER_GENERATION = 64

        #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
        preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    qmeas.stop_time('selfplay_wait')
    print("Gathering game output...")
    gather()

    print("Training on gathered game data...")
    _, model_name = get_latest_model()
    new_model = train()


    if goparams.EVALUATE_PUZZLES:


      qmeas.start_time('puzzle')
      new_model_path = os.path.join(MODELS_DIR, new_model)
      sgf_files = [
        './benchmark_sgf/9x9_pro_YKSH.sgf',
        './benchmark_sgf/9x9_pro_IYMD.sgf',
        './benchmark_sgf/9x9_pro_YSIY.sgf',
        './benchmark_sgf/9x9_pro_IYHN.sgf',
      ]
      result, total_pct = predict_games.report_for_puzzles(new_model_path, sgf_files, 2, tries_per_move=1)
      print('accuracy = ', total_pct)
      qmeas.record('puzzle_total', total_pct)
      qmeas.record('puzzle_result', repr(result))
      qmeas.record('puzzle_summary', {'results': repr(result), 'total_pct': total_pct, 'model': new_model})
      qmeas._flush()
      with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'), 'w') as f:
        f.write(repr(result))
        f.write('\n' + str(total_pct) + '\n')
      qmeas.stop_time('puzzle')
      if total_pct >= goparams.TERMINATION_ACCURACY:
        print('Reaching termination accuracy; ', goparams.TERMINATION_ACCURACY)
        with open('TERMINATE_FLAG', 'w') as f:
          f.write(repr(result))
          f.write('\n' + str(total_pct) + '\n')


    if goparams.EVALUATE_MODELS:
      if not evaluate(model_name, new_model):
        bury_latest_model()
def rl_loop_train():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """
    qmeas.stop_time('selfplay_wait')
    print("Gathering game output...")
    gather()

    print("Training on gathered game data...")
    _, model_name = get_latest_model()
    new_model = train()
Exemplo n.º 3
0
def gather(
    input_directory: 'where to look for games' = 'data/selfplay/',
    output_directory: 'where to put collected games' = 'data/training_chunks/',
    examples_per_record:
    'how many tf.examples to gather in each chunk' = EXAMPLES_PER_RECORD):
    qmeas.start_time('gather')
    _ensure_dir_exists(output_directory)
    models = [
        model_dir.strip('/')
        for model_dir in sorted(gfile.ListDirectory(input_directory))[-50:]
    ]
    with timer("Finding existing tfrecords..."):
        model_gamedata = {
            model:
            gfile.Glob(os.path.join(input_directory, model, '*.tfrecord.zz'))
            for model in models
        }
    print("Found %d models" % len(models))
    for model_name, record_files in sorted(model_gamedata.items()):
        print("    %s: %s files" % (model_name, len(record_files)))

    meta_file = os.path.join(output_directory, 'meta.txt')
    try:
        with gfile.GFile(meta_file, 'r') as f:
            already_processed = set(f.read().split())
    except tf.errors.NotFoundError:
        already_processed = set()

    num_already_processed = len(already_processed)

    for model_name, record_files in sorted(model_gamedata.items()):
        if set(record_files) <= already_processed:
            continue
        print("Gathering files for %s:" % model_name)
        for i, example_batch in enumerate(
                tqdm(
                    preprocessing.shuffle_tf_examples(examples_per_record,
                                                      record_files))):
            output_record = os.path.join(
                output_directory,
                '{}-{}.tfrecord.zz'.format(model_name, str(i)))
            preprocessing.write_tf_examples(output_record,
                                            example_batch,
                                            serialize=False)
        already_processed.update(record_files)

    print("Processed %s new files" %
          (len(already_processed) - num_already_processed))
    with gfile.GFile(meta_file, 'w') as f:
        f.write('\n'.join(sorted(already_processed)))
    qmeas.stop_time('gather')
Exemplo n.º 4
0
def train(working_dir: 'tf.estimator working directory.',
          chunk_dir: 'Directory where gathered training chunks are.',
          model_save_path: 'Where to export the completed generation.',
          generation_num: 'Which generation you are training.' = 0):
    qmeas.start_time('train')
    tf_records = sorted(gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz')))
    tf_records = tf_records[-1 * (WINDOW_SIZE // EXAMPLES_PER_RECORD):]

    print("Training from:", tf_records[0], "to", tf_records[-1])

    with timer("Training"):
        dual_net.train(working_dir, tf_records, generation_num)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('train')
def rl_loop_eval():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    (_, new_model) = get_latest_model()

    qmeas.start_time('puzzle')
    new_model_path = os.path.join(MODELS_DIR, new_model)
    sgf_files = [
        './benchmark_sgf/9x9_pro_YKSH.sgf',
        './benchmark_sgf/9x9_pro_IYMD.sgf',
        './benchmark_sgf/9x9_pro_YSIY.sgf',
        './benchmark_sgf/9x9_pro_IYHN.sgf',
    ]
    result, total_pct = predict_games.report_for_puzzles_parallel(
        new_model_path, sgf_files, 2, tries_per_move=1)
    #result, total_pct = predict_games.report_for_puzzles(new_model_path, sgf_files, 2, tries_per_move=1)
    print('accuracy = ', total_pct)
    print('result = ', result)
    mlperf_log.minigo_print(key=mlperf_log.EVAL_ACCURACY,
                            value={
                                "epoch": iteration,
                                "value": total_pct
                            })
    mlperf_log.minigo_print(key=mlperf_log.EVAL_TARGET,
                            value=goparams.TERMINATION_ACCURACY)
    qmeas.record('puzzle_total', total_pct)
    qmeas.record('puzzle_result', repr(result))
    qmeas.record('puzzle_summary', {
        'results': repr(result),
        'total_pct': total_pct,
        'model': new_model
    })
    qmeas._flush()
    with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'), 'w') as f:
        f.write(repr(result))
        f.write('\n' + str(total_pct) + '\n')
    qmeas.stop_time('puzzle')
    if total_pct >= goparams.TERMINATION_ACCURACY:
        print('Reaching termination accuracy; ', goparams.TERMINATION_ACCURACY)
        mlperf_log.minigo_print(key=mlperf_log.RUN_STOP,
                                value={"success": True})
        with open('TERMINATE_FLAG', 'w') as f:
            f.write(repr(result))
            f.write('\n' + str(total_pct) + '\n')
    qmeas.end()
Exemplo n.º 6
0
def train(
        working_dir: 'tf.estimator working directory.',
        chunk_dir: 'Directory where gathered training chunks are.',
        model_save_path: 'Where to export the completed generation.',
        generation_num: 'Which generation you are training.'=0):
    qmeas.start_time('train')
    tf_records = sorted(gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz')))
    tf_records = tf_records[-1 * (WINDOW_SIZE // EXAMPLES_PER_RECORD):]

    print("Training from:", tf_records[0], "to", tf_records[-1])

    with timer("Training"):
        dual_net.train(working_dir, tf_records, generation_num)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('train')
Exemplo n.º 7
0
def bootstrap(
        working_dir: 'tf.estimator working directory. If not set, defaults to a random tmp dir'=None,
        model_save_path: 'Where to export the first bootstrapped generation'=None):
    qmeas.start_time('bootstrap')
    if working_dir is None:
        with tempfile.TemporaryDirectory() as working_dir:
            _ensure_dir_exists(working_dir)
            _ensure_dir_exists(os.path.dirname(model_save_path))
            dual_net.bootstrap(working_dir)
            dual_net.export_model(working_dir, model_save_path)
    else:
        _ensure_dir_exists(working_dir)
        _ensure_dir_exists(os.path.dirname(model_save_path))
        dual_net.bootstrap(working_dir)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('bootstrap')
Exemplo n.º 8
0
def bootstrap(
        working_dir: 'tf.estimator working directory. If not set, defaults to a random tmp dir'=None,
        model_save_path: 'Where to export the first bootstrapped generation'=None):
    qmeas.start_time('bootstrap')
    if working_dir is None:
        with tempfile.TemporaryDirectory() as working_dir:
            _ensure_dir_exists(working_dir)
            _ensure_dir_exists(os.path.dirname(model_save_path))
            dual_net.bootstrap(working_dir)
            dual_net.export_model(working_dir, model_save_path)
    else:
        _ensure_dir_exists(working_dir)
        _ensure_dir_exists(os.path.dirname(model_save_path))
        dual_net.bootstrap(working_dir)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('bootstrap')
Exemplo n.º 9
0
def validate(
        working_dir: 'tf.estimator working directory',
        *tf_record_dirs: 'Directories where holdout data are',
        checkpoint_name: 'Which checkpoint to evaluate (None=latest)'=None,
        validate_name: 'Name for validation set (i.e., selfplay or human)'=None):
    qmeas.start_time('validate')
    tf_records = []
    with timer("Building lists of holdout files"):
        for record_dir in tf_record_dirs:
            tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))

    first_record = os.path.basename(tf_records[0])
    last_record = os.path.basename(tf_records[-1])
    with timer("Validating from {} to {}".format(first_record, last_record)):
        dual_net.validate(
            working_dir, tf_records, checkpoint_name=checkpoint_name,
            name=validate_name)
    qmeas.stop_time('validate')
Exemplo n.º 10
0
def validate(
        working_dir: 'tf.estimator working directory',
        *tf_record_dirs: 'Directories where holdout data are',
        checkpoint_name: 'Which checkpoint to evaluate (None=latest)'=None,
        validate_name: 'Name for validation set (i.e., selfplay or human)'=None):
    qmeas.start_time('validate')
    tf_records = []
    with timer("Building lists of holdout files"):
        for record_dir in tf_record_dirs:
            tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))

    first_record = os.path.basename(tf_records[0])
    last_record = os.path.basename(tf_records[-1])
    with timer("Validating from {} to {}".format(first_record, last_record)):
        dual_net.validate(
            working_dir, tf_records, checkpoint_name=checkpoint_name,
            name=validate_name)
    qmeas.stop_time('validate')
Exemplo n.º 11
0
def gather(
        input_directory: 'where to look for games'='data/selfplay/',
        output_directory: 'where to put collected games'='data/training_chunks/',
        examples_per_record: 'how many tf.examples to gather in each chunk'=EXAMPLES_PER_RECORD):
    qmeas.start_time('gather')
    _ensure_dir_exists(output_directory)
    models = [model_dir.strip('/')
              for model_dir in sorted(gfile.ListDirectory(input_directory))[-50:]]
    with timer("Finding existing tfrecords..."):
        model_gamedata = {
            model: gfile.Glob(
                os.path.join(input_directory, model, '*.tfrecord.zz'))
            for model in models
        }
    print("Found %d models" % len(models))
    for model_name, record_files in sorted(model_gamedata.items()):
        print("    %s: %s files" % (model_name, len(record_files)))

    meta_file = os.path.join(output_directory, 'meta.txt')
    try:
        with gfile.GFile(meta_file, 'r') as f:
            already_processed = set(f.read().split())
    except tf.errors.NotFoundError:
        already_processed = set()

    num_already_processed = len(already_processed)

    for model_name, record_files in sorted(model_gamedata.items()):
        if set(record_files) <= already_processed:
            continue
        print("Gathering files for %s:" % model_name)
        for i, example_batch in enumerate(
                tqdm(preprocessing.shuffle_tf_examples(examples_per_record, record_files))):
            output_record = os.path.join(output_directory,
                                         '{}-{}.tfrecord.zz'.format(model_name, str(i)))
            preprocessing.write_tf_examples(
                output_record, example_batch, serialize=False)
        already_processed.update(record_files)

    print("Processed %s new files" %
          (len(already_processed) - num_already_processed))
    with gfile.GFile(meta_file, 'w') as f:
        f.write('\n'.join(sorted(already_processed)))
    qmeas.stop_time('gather')
Exemplo n.º 12
0
def selfplay(
        load_file: "The path to the network model files",
        output_dir: "Where to write the games" = "data/selfplay",
        holdout_dir: "Where to write the games" = "data/holdout",
        output_sgf: "Where to write the sgfs" = "sgf/",
        readouts: 'How many simulations to run per move' = 100,
        verbose: '>=2 will print debug info, >=3 will print boards' = 1,
        resign_threshold: 'absolute value of threshold to resign at' = 0.95,
        holdout_pct: 'how many games to hold out for validation' = 0.05):
    qmeas.start_time('selfplay')
    clean_sgf = os.path.join(output_sgf, 'clean')
    full_sgf = os.path.join(output_sgf, 'full')
    _ensure_dir_exists(clean_sgf)
    _ensure_dir_exists(full_sgf)
    _ensure_dir_exists(output_dir)
    _ensure_dir_exists(holdout_dir)

    with timer("Loading weights from %s ... " % load_file):
        network = dual_net.DualNetwork(load_file)

    with timer("Playing game"):
        player = selfplay_mcts.play(network, readouts, resign_threshold,
                                    verbose)

    output_name = '{}-{}'.format(int(time.time() * 1000 * 1000),
                                 socket.gethostname())
    game_data = player.extract_data()
    with gfile.GFile(os.path.join(clean_sgf, '{}.sgf'.format(output_name)),
                     'w') as f:
        f.write(player.to_sgf(use_comments=False))
    with gfile.GFile(os.path.join(full_sgf, '{}.sgf'.format(output_name)),
                     'w') as f:
        f.write(player.to_sgf())

    tf_examples = preprocessing.make_dataset_from_selfplay(game_data)

    # Hold out 5% of games for evaluation.
    if random.random() < holdout_pct:
        fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name))
    else:
        fname = os.path.join(output_dir, "{}.tfrecord.zz".format(output_name))

    preprocessing.write_tf_examples(fname, tf_examples)
    qmeas.stop_time('selfplay')
Exemplo n.º 13
0
def evaluate_both(
        prev_model: 'The path to previous model',
        cur_model: 'The path to current model',
        output_dir: 'Where to write the evaluation results'='sgf/evaluate',
        readouts: 'How many readouts to make per move.'=200,
        games: 'the number of games to play'=20,
        verbose: 'How verbose the players should be (see selfplay)' = 1):
    qmeas.start_time('evaluate')
    _ensure_dir_exists(output_dir)

    winners = []
    with timer("%d games" % games):
        winners = evaluation.play_match_many_instance_both(
            prev_model, cur_model, games, readouts, output_dir, verbose)
    qmeas.stop_time('evaluate')
    white_count = 0
    for win in winners:
      if 'W' in win or 'w' in win:
        white_count += 1
    return white_count * 1.0 / (games*2)
Exemplo n.º 14
0
def selfplay(
        load_file: "The path to the network model files",
        output_dir: "Where to write the games"="data/selfplay",
        holdout_dir: "Where to write the games"="data/holdout",
        output_sgf: "Where to write the sgfs"="sgf/",
        readouts: 'How many simulations to run per move'=100,
        verbose: '>=2 will print debug info, >=3 will print boards' = 1,
        resign_threshold: 'absolute value of threshold to resign at' = 0.95,
        holdout_pct: 'how many games to hold out for validation' = 0.05):
    qmeas.start_time('selfplay')
    clean_sgf = os.path.join(output_sgf, 'clean')
    full_sgf = os.path.join(output_sgf, 'full')
    _ensure_dir_exists(clean_sgf)
    _ensure_dir_exists(full_sgf)
    _ensure_dir_exists(output_dir)
    _ensure_dir_exists(holdout_dir)

    with timer("Loading weights from %s ... " % load_file):
        network = dual_net.DualNetwork(load_file)

    with timer("Playing game"):
        player = selfplay_mcts.play(
            network, readouts, resign_threshold, verbose)

    output_name = '{}-{}'.format(int(time.time() * 1000 * 1000), socket.gethostname())
    game_data = player.extract_data()
    with gfile.GFile(os.path.join(clean_sgf, '{}.sgf'.format(output_name)), 'w') as f:
        f.write(player.to_sgf(use_comments=False))
    with gfile.GFile(os.path.join(full_sgf, '{}.sgf'.format(output_name)), 'w') as f:
        f.write(player.to_sgf())

    tf_examples = preprocessing.make_dataset_from_selfplay(game_data)

    # Hold out 5% of games for evaluation.
    if random.random() < holdout_pct:
        fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name))
    else:
        fname = os.path.join(output_dir, "{}.tfrecord.zz".format(output_name))

    preprocessing.write_tf_examples(fname, tf_examples)
    qmeas.stop_time('selfplay')
Exemplo n.º 15
0
def evaluate(
        black_model: 'The path to the model to play black',
        white_model: 'The path to the model to play white',
        output_dir: 'Where to write the evaluation results'='sgf/evaluate',
        readouts: 'How many readouts to make per move.'=200,
        games: 'the number of games to play'=20,
        verbose: 'How verbose the players should be (see selfplay)' = 1):
    qmeas.start_time('evaluate')
    _ensure_dir_exists(output_dir)

    with timer("Loading weights"):
        black_net = dual_net.DualNetwork(black_model)
        white_net = dual_net.DualNetwork(white_model)

    winners = []
    with timer("%d games" % games):
        winners = evaluation.play_match(
            black_net, white_net, games, readouts, output_dir, verbose)
    qmeas.stop_time('evaluate')
    white_count = 0
    for win in winners:
      if 'W' in win or 'w' in win:
        white_count += 1
    return white_count * 1.0 / games
Exemplo n.º 16
0
def evaluate(
        black_model: 'The path to the model to play black',
        white_model: 'The path to the model to play white',
        output_dir: 'Where to write the evaluation results'='sgf/evaluate',
        readouts: 'How many readouts to make per move.'=200,
        games: 'the number of games to play'=20,
        verbose: 'How verbose the players should be (see selfplay)' = 1):
    qmeas.start_time('evaluate')
    _ensure_dir_exists(output_dir)

    with timer("Loading weights"):
        black_net = dual_net.DualNetwork(black_model)
        white_net = dual_net.DualNetwork(white_model)

    winners = []
    with timer("%d games" % games):
        winners = evaluation.play_match(
            black_net, white_net, games, readouts, output_dir, verbose)
    qmeas.stop_time('evaluate')
    white_count = 0
    for win in winners:
      if 'W' in win or 'w' in win:
        white_count += 1
    return white_count * 1.0 / games
Exemplo n.º 17
0
def main_():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """
    print('Starting self play loop.')

    qmeas.start_time('selfplay_wait')
    start_t = time.time()

    _, model_name = get_latest_model()
    num_workers = 0

    procs = []

    if sys.argv[3] == 'worker' or sys.argv[3] == 'driver':
        selfplay_dir = os.path.join(SELFPLAY_DIR, model_name)
    else:
        selfplay_dir = SELFPLAY_BACKUP_DIR

    def count_live_procs():
        return len(list(filter(lambda proc: proc.poll() is None, procs)))

    def start_worker(num_workers):
        worker_seed = hash(hash(SEED) + ITERATION) + num_workers
        cmd = 'GOPARAMS={} OMP_NUM_THREADS=1 KMP_HW_SUBSET={} KMP_AFFINITY=granularity=fine,proclist=[{}],explicit python3 selfplay_worker.py {} {} {}'.format(
            os.environ['GOPARAMS'], os.environ['KMP_HW_SUBSET'],
            num_workers % multiprocessing.cpu_count(), BASE_DIR, worker_seed,
            sys.argv[3])
        procs.append(subprocess.Popen(cmd, shell=True))

    def count_games():
        # returns number of games in the selfplay directory
        if not os.path.exists(selfplay_dir):
            # directory not existing implies no games have been played yet
            return 0
        return len(gfile.Glob(os.path.join(selfplay_dir, '*.zz')))

    # generate selfplay games until needed number of games reached
    if sys.argv[3] == 'worker':
        for i in range(goparams.NUM_PARALLEL_SELFPLAY):
            print('Starting Worker...')
            start_worker(num_workers)
            time.sleep(0.1)
            num_workers += 1
        sys.stdout.flush()

        while count_games() < MAX_GAMES_PER_GENERATION and not os.path.isfile(
                "PK_FLAG"):
            time.sleep(1)
            games = count_games()
            sys.stdout.flush()

        print('Done with selfplay loop.')

        for proc in procs:
            proc.kill()

        # Sometimes the workers need extra help...
        os.system('pkill -f selfplay_worker.py')

        sys.stdout.flush()

    # check generated games, remove exssesive games
    if sys.argv[3] == 'driver':
        # Because we use process level parallelism for selfpaying and we don't
        # sync or communicate between processes, there could be too many games
        # played (up to 1 extra game per worker process).
        # This is a rather brutish way to ensure we train on the correct number
        # of games...
        print('There are {} games in the selfplay directory at {}'.format(
            count_games(), selfplay_dir))
        sys.stdout.flush()
        while count_games() > MAX_GAMES_PER_GENERATION:
            games = count_games()
            print('Too many selfplay games ({}/{}) ... deleting extra'.format(
                games, MAX_GAMES_PER_GENERATION))
            # This will remove exactly one game file from the selfplay directory... or
            # so we hope :)
            sys.stdout.flush()
            os.system('ls {}/* -d | tail -n {} | xargs rm '.format(
                selfplay_dir, games - MAX_GAMES_PER_GENERATION))
        print(
            'After cleanup, there are {} games in the selfplay directory at {}'
            .format(count_games(), selfplay_dir))
        sys.stdout.flush()

    # generate backup games, in case the new model will be buried and we need more old games for training
    if sys.argv[3] == 'backup':
        for i in range(goparams.NUM_PARALLEL_SELFPLAY):
            print('Starting Worker...')
            start_worker(num_workers)
            num_workers += 1
        sys.stdout.flush()

        while count_games() < MAX_GAMES_PER_GENERATION:
            time.sleep(1)
            games = count_games()
            sys.stdout.flush()

        print('Done with selfplay loop.')

        for proc in procs:
            proc.kill()

        # Sometimes the workers need extra help...
        os.system('pkill -f selfplay_worker.py')

        sys.stdout.flush()

    if sys.argv[3] == 'clean_backup':
        print('cleaning up {}'.format(SELFPLAY_BACKUP_DIR))
        os.system('rm {}/*'.format(SELFPLAY_BACKUP_DIR))
    qmeas.stop_time('selfplay_wait')
Exemplo n.º 18
0
def rl_loop():
    """Run the reinforcement learning loop

    This is meant to be more of an integration test than a realistic way to run
    the reinforcement learning.
    """
    # monkeypatch the hyperparams so that we get a quickly executing network.
    dual_net.get_default_hyperparams = lambda **kwargs: {
        'k': 8,
        'fc_width': 16,
        'num_shared_layers': 1,
        'l2_strength': 1e-4,
        'momentum': 0.9
    }

    dual_net.TRAIN_BATCH_SIZE = 16
    dual_net.EXAMPLES_PER_GENERATION = 64

    #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
    preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    # with tempfile.TemporaryDirectory() as base_dir:
    base_dir = "/tmp/minigo"
    with open('/tmp/foo', 'w') as fff:
        working_dir = os.path.join(base_dir, 'models_in_training')
        model_save_path = os.path.join(base_dir, 'models', '000000-bootstrap')
        next_model_save_file = os.path.join(base_dir, 'models',
                                            '000001-nextmodel')
        selfplay_dir = os.path.join(base_dir, 'data', 'selfplay')
        model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap')
        gather_dir = os.path.join(base_dir, 'data', 'training_chunks')
        holdout_dir = os.path.join(base_dir, 'data', 'holdout',
                                   '000000-bootstrap')
        sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap')
        os.makedirs(os.path.join(base_dir, 'data'), exist_ok=True)

        print("Creating random initial weights...")
        main.bootstrap(working_dir, model_save_path)
        for i in range(100):
            qmeas.start_time('main-loop')
            print("Playing some games...")
            # Do two selfplay runs to test gather functionality
            qmeas.start_time('main-loop-self-play')
            for j in range(2):
                main.selfplay(load_file=model_save_path,
                              output_dir=model_selfplay_dir,
                              output_sgf=sgf_dir,
                              holdout_pct=0,
                              readouts=10)
            qmeas.stop_time('main-loop-self-play')
            # Do one holdout run to test validation
            qmeas.start_time('main-loop-self-play-holdout')
            main.selfplay(load_file=model_save_path,
                          holdout_dir=holdout_dir,
                          output_dir=model_selfplay_dir,
                          output_sgf=sgf_dir,
                          holdout_pct=100,
                          readouts=10)
            qmeas.stop_time('main-loop-self-play-holdout')

            print("See sgf files here?")
            sgf_listing = subprocess.check_output(
                ["ls", "-l", sgf_dir + "/full"])
            print(sgf_listing.decode("utf-8"))

            print("Gathering game output...")
            qmeas.start_time('main-loop-gather')
            main.gather(input_directory=selfplay_dir,
                        output_directory=gather_dir)
            qmeas.stop_time('main-loop-gather')
            print("Training on gathered game data...")
            qmeas.start_time('main-loop-train')
            main.train(working_dir,
                       gather_dir,
                       next_model_save_file,
                       generation_num=1)
            qmeas.stop_time('main-loop-train')
            print("Trying validate on 'holdout' game...")
            qmeas.start_time('main-loop-validate')
            main.validate(working_dir, holdout_dir)
            qmeas.stop_time('main-loop-validate')
            print("Verifying that new checkpoint is playable...")
            main.selfplay(load_file=next_model_save_file,
                          holdout_dir=holdout_dir,
                          output_dir=model_selfplay_dir,
                          output_sgf=sgf_dir,
                          readouts=10)
            qmeas.stop_time('main-loop')
            qmeas._flush()
Exemplo n.º 19
0
def main_():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """
    print('Starting self play loop.')

    qmeas.start_time('selfplay_wait')
    start_t = time.time()

    _, model_name = get_latest_model()

    num_workers = 0

    procs = [
    ]
    def count_live_procs():
      return len(list(filter(lambda proc: proc.poll() is None, procs)))
    def start_worker(num_workers):
      #procs.append(subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE))
      worker_seed = hash(hash(SEED) + ITERATION) + num_workers
      cmd = 'GOPARAMS={} python3 selfplay_worker.py {} {}'.format(os.environ['GOPARAMS'], BASE_DIR, worker_seed)
      procs.append(subprocess.Popen(cmd, shell=True))

    selfplay_dir = os.path.join(SELFPLAY_DIR, model_name)
    def count_games():
      # returns number of games in the selfplay directory
      if not os.path.exists(os.path.join(SELFPLAY_DIR, model_name)):
        # directory not existing implies no games have been played yet
        return 0
      return len(gfile.Glob(os.path.join(SELFPLAY_DIR, model_name, '*.zz')))


    for i in range(goparams.NUM_PARALLEL_SELFPLAY):
      print('Starting Worker...')
      num_workers += 1
      start_worker(num_workers)
      time.sleep(1)
    sys.stdout.flush()

    while count_games() < MAX_GAMES_PER_GENERATION:
        time.sleep(10)
        games = count_games()
        print('Found Games: {}'.format(games))
        print('selfplaying: {:.2f} games/hour'.format(games / ((time.time() - start_t) / 60 / 60) ))
        print('Worker Processes: {}'.format(count_live_procs()))
        sys.stdout.flush()


    print('Done with selfplay loop.')

    time.sleep(10)

    for proc in procs:
      proc.kill()

    # Sometimes the workers need extra help...
    time.sleep(5)
    os.system('pkill -f selfplay_worker.py')

    # Let things settle after we kill processes.
    time.sleep(10)

    # Because we use process level parallelism for selfpaying and we don't
    # sync or communicate between processes, there could be too many games
    # played (up to 1 extra game per worker process).
    # This is a rather brutish way to ensure we train on the correct number
    # of games...
    print('There are {} games in the selfplay directory at {}'.format(count_games(), selfplay_dir))
    sys.stdout.flush()
    while count_games() > MAX_GAMES_PER_GENERATION:
      print('Too many selfplay games ({}/{}) ... deleting one'.format(count_games(), MAX_GAMES_PER_GENERATION))
      # This will remove exactly one game file from the selfplay directory... or
      # so we hope :)
      sys.stdout.flush()
      os.system('ls {}/* -d | tail -n 1 | xargs rm'.format(selfplay_dir))
      # unclear if this sleep is necessary...
      time.sleep(1)
    print('After cleanup, there are {} games in the selfplay directory at {}'.format(count_games(), selfplay_dir))
    sys.stdout.flush()

    qmeas.stop_time('selfplay_wait')
def main_():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """
    print('Starting self play loop.')

    qmeas.start_time('selfplay_wait')
    start_t = time.time()

    _, model_name = get_latest_model()

    num_workers = 0

    procs = []

    def count_live_procs():
        return len(list(filter(lambda proc: proc.poll() is None, procs)))

    def start_worker(num_workers):
        #procs.append(subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE))
        worker_seed = hash(hash(SEED) + ITERATION) + num_workers
        cmd = 'GOPARAMS={} python3 selfplay_worker.py {} {}'.format(
            os.environ['GOPARAMS'], BASE_DIR, worker_seed)
        procs.append(subprocess.Popen(cmd, shell=True))

    selfplay_dir = os.path.join(SELFPLAY_DIR, model_name)

    def count_games():
        # returns number of games in the selfplay directory
        if not os.path.exists(os.path.join(SELFPLAY_DIR, model_name)):
            # directory not existing implies no games have been played yet
            return 0
        return len(gfile.Glob(os.path.join(SELFPLAY_DIR, model_name, '*.zz')))

    print(
        'NUM_PARALLEL_SELFPLAY = {n}'.format(n=goparams.NUM_PARALLEL_SELFPLAY))
    for i in range(goparams.NUM_PARALLEL_SELFPLAY):
        print('Starting Worker...')
        num_workers += 1
        start_worker(num_workers)
        time.sleep(1)
    sys.stdout.flush()

    while count_games() < MAX_GAMES_PER_GENERATION:
        time.sleep(10)
        games = count_games()
        print('Found Games: {}'.format(games))
        print('selfplaying: {:.2f} games/hour'.format(
            games / ((time.time() - start_t) / 60 / 60)))
        print('Worker Processes: {}'.format(count_live_procs()))
        sys.stdout.flush()

    print('Done with selfplay loop.')

    time.sleep(10)

    for proc in procs:
        proc.kill()

    # Sometimes the workers need extra help...
    time.sleep(5)
    os.system('pkill -f selfplay_worker.py')

    # Let things settle after we kill processes.
    time.sleep(10)

    # Because we use process level parallelism for selfpaying and we don't
    # sync or communicate between processes, there could be too many games
    # played (up to 1 extra game per worker process).
    # This is a rather brutish way to ensure we train on the correct number
    # of games...
    print('There are {} games in the selfplay directory at {}'.format(
        count_games(), selfplay_dir))
    sys.stdout.flush()
    while count_games() > MAX_GAMES_PER_GENERATION:
        print('Too many selfplay games ({}/{}) ... deleting one'.format(
            count_games(), MAX_GAMES_PER_GENERATION))
        # This will remove exactly one game file from the selfplay directory... or
        # so we hope :)
        sys.stdout.flush()
        os.system('ls {}/* -d | tail -n 1 | xargs rm'.format(selfplay_dir))
        # unclear if this sleep is necessary...
        time.sleep(1)
    print('After cleanup, there are {} games in the selfplay directory at {}'.
          format(count_games(), selfplay_dir))
    sys.stdout.flush()

    qmeas.stop_time('selfplay_wait')
Exemplo n.º 21
0
def rl_loop():
    """Run the reinforcement learning loop

    This is meant to be more of an integration test than a realistic way to run
    the reinforcement learning.
    """
    # monkeypatch the hyperparams so that we get a quickly executing network.
    dual_net.get_default_hyperparams = lambda **kwargs: {
        'k': 8, 'fc_width': 16, 'num_shared_layers': 1, 'l2_strength': 1e-4, 'momentum': 0.9}

    dual_net.TRAIN_BATCH_SIZE = 16
    dual_net.EXAMPLES_PER_GENERATION = 64

    #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
    preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    # with tempfile.TemporaryDirectory() as base_dir:
    base_dir = "/tmp/minigo"
    with open('/tmp/foo', 'w') as fff:
        working_dir = os.path.join(base_dir, 'models_in_training')
        model_save_path = os.path.join(base_dir, 'models', '000000-bootstrap')
        next_model_save_file = os.path.join(base_dir, 'models', '000001-nextmodel')
        selfplay_dir = os.path.join(base_dir, 'data', 'selfplay')
        model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap')
        gather_dir = os.path.join(base_dir, 'data', 'training_chunks')
        holdout_dir = os.path.join(
            base_dir, 'data', 'holdout', '000000-bootstrap')
        sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap')
        os.makedirs(os.path.join(base_dir, 'data'), exist_ok=True)

        print("Creating random initial weights...")
        main.bootstrap(working_dir, model_save_path)
        for i in range(100):
          qmeas.start_time('main-loop')
          print("Playing some games...")
          # Do two selfplay runs to test gather functionality
          qmeas.start_time('main-loop-self-play')
          for j in range(2):
            main.selfplay(
                load_file=model_save_path,
                output_dir=model_selfplay_dir,
                output_sgf=sgf_dir,
                holdout_pct=0,
                readouts=10)
          qmeas.stop_time('main-loop-self-play')
          # Do one holdout run to test validation
          qmeas.start_time('main-loop-self-play-holdout')
          main.selfplay(
              load_file=model_save_path,
              holdout_dir=holdout_dir,
              output_dir=model_selfplay_dir,
              output_sgf=sgf_dir,
              holdout_pct=100,
              readouts=10)
          qmeas.stop_time('main-loop-self-play-holdout')

          print("See sgf files here?")
          sgf_listing = subprocess.check_output(["ls", "-l", sgf_dir + "/full"])
          print(sgf_listing.decode("utf-8"))

          print("Gathering game output...")
          qmeas.start_time('main-loop-gather')
          main.gather(input_directory=selfplay_dir, output_directory=gather_dir)
          qmeas.stop_time('main-loop-gather')
          print("Training on gathered game data...")
          qmeas.start_time('main-loop-train')
          main.train(working_dir, gather_dir, next_model_save_file, generation_num=1)
          qmeas.stop_time('main-loop-train')
          print("Trying validate on 'holdout' game...")
          qmeas.start_time('main-loop-validate')
          main.validate(working_dir, holdout_dir)
          qmeas.stop_time('main-loop-validate')
          print("Verifying that new checkpoint is playable...")
          main.selfplay(
              load_file=next_model_save_file,
              holdout_dir=holdout_dir,
              output_dir=model_selfplay_dir,
              output_sgf=sgf_dir,
              readouts=10)
          qmeas.stop_time('main-loop')
          qmeas._flush()