def _determine_chunk_to_make(write_dir):
    """
    Returns the full path of the chunk to make (gs://...)
    and a boolean, indicating whether we should wait for a new model
    or if we're 'behind' and should just write out our current chunk immediately
    True == write immediately.
    """
    models = fsdb.get_models()
    # Last model is N.  N+1 (should be) training.  We should gather games for N+2.
    chunk_to_make = os.path.join(write_dir,
                                 str(models[-1][0] + 1) + '.tfrecord.zz')
    if not tf.gfile.Exists(chunk_to_make):
        # N+1 is missing.  Write it out ASAP
        print("Making chunk ASAP:", chunk_to_make)
        return chunk_to_make, True
    chunk_to_make = os.path.join(write_dir,
                                 str(models[-1][0] + 2) + '.tfrecord.zz')
    while tf.gfile.Exists(chunk_to_make):
        print("Chunk for next model ({}) already exists. Sleeping.".format(
            chunk_to_make))
        time.sleep(5 * 60)
        models = fsdb.get_models()
        chunk_to_make = os.path.join(write_dir,
                                     str(models[-1][0] + 2) + '.tfrecord.zz')
    print("Making chunk:", chunk_to_make)

    return chunk_to_make, False
def fill_and_wait_models(bufsize=EXAMPLES_PER_GENERATION,
                         write_dir=None,
                         threads=8,
                         model_window=100,
                         skip_first_rsync=False):
    """ Fills a ringbuffer with positions from the most recent games, then
    continually rsync's and updates the buffer until a new model is promoted.
    Once it detects a new model, iit then dumps its contents for training to
    immediately begin on the next model.
    """
    write_dir = write_dir or fsdb.golden_chunk_dir()
    buf = ExampleBuffer(bufsize)
    models = fsdb.get_models()[-model_window:]
    if not skip_first_rsync:
        with timer("Rsync"):
            smart_rsync(models[-1][0] - 6)
    files = tqdm(map(files_for_model, models), total=len(models))
    buf.parallel_fill(list(itertools.chain(*files)), threads=threads)

    print("Filled buffer, watching for new games")
    while fsdb.get_latest_model()[0] == models[-1][0]:
        with timer("Rsync"):
            smart_rsync(models[-1][0] - 2)
        new_files = tqdm(map(files_for_model, models[-2:]), total=len(models))
        buf.update(list(itertools.chain(*new_files)))
        time.sleep(60)
    latest = fsdb.get_latest_model()

    print("New model!", latest[1], "!=", models[-1][1])
    print(buf)
    buf.flush(os.path.join(write_dir, str(latest[0] + 1) + '.tfrecord.zz'))
def smart_rsync(from_model_num=0, source_dir=None, dest_dir=LOCAL_DIR):
    source_dir = source_dir or fsdb.selfplay_dir()
    from_model_num = 0 if from_model_num < 0 else from_model_num
    models = [m for m in fsdb.get_models() if m[0] >= from_model_num]
    for _, model in models:
        _rsync_dir(os.path.join(source_dir, model),
                   os.path.join(dest_dir, model))
def main():
    root = os.path.abspath(
        os.path.join("sgf", fsdb.FLAGS.bucket_name, "sgf/eval"))
    if FLAGS.sync_ratings:
        sync(root)

    for k, v in top_n(20):
        print("Top model {}: {}".format(k, v))

    db = sqlite3.connect("ratings.db")
    print("db has",
          db.execute("select count(*) from wins").fetchone()[0], "games")
    models = fsdb.get_models()
    for m in models[-10:]:
        m_id = model_id_of(m[0])
        if m_id in r:
            rat, sigma = r[m_id]
            print("{:>30}:  {:.2f} ({:.3f})".format(m[1], rat, sigma))
        else:
            print("{}, Model id not found({})".format(m[1], m_id))

    # Suggest some pairs
    random.seed(5)
    print()
    suggest_pairs(5, 2)
Пример #5
0
def main():
    root = os.path.abspath(
        os.path.join("sgf", fsdb.FLAGS.bucket_name, "sgf/eval"))
    sync(root)
    models = fsdb.get_models()
    data = wins_subset(fsdb.models_dir())
    print(len(data))
    r = compute_ratings(data)
    for v, k in sorted([(v, k) for k, v in r.items()])[-20:][::-1]:
        print(models[model_num_for(k)][1], v)
    db = sqlite3.connect("ratings.db")
    print(db.execute("select count(*) from wins").fetchone()[0], "games")
    for m in models[-10:]:
        m_id = model_id(m[0])
        print(m[1], r.get(m_id, "model id not found({})".format(m_id)))
def fill_and_wait_time(bufsize=EXAMPLES_PER_GENERATION,
                       write_dir=None,
                       threads=32,
                       start_from=None):
    start_from = start_from or dt.datetime.utcnow()
    write_dir = write_dir or fsdb.golden_chunk_dir()
    buf = ExampleBuffer(bufsize)
    chunk_to_make, fast_write = _determine_chunk_to_make(write_dir)

    hours = fsdb.get_hour_dirs()
    with timer("Rsync"):
        time_rsync(
            min(dt.datetime.strptime(hours[-1], "%Y-%m-%d-%H/"), start_from))
        start_from = dt.datetime.utcnow()

    hours = fsdb.get_hour_dirs()
    files = (tf.gfile.Glob(os.path.join(LOCAL_DIR, d, "*.zz"))
             for d in reversed(hours)
             if tf.gfile.Exists(os.path.join(LOCAL_DIR, d)))
    files = itertools.islice(files, get_window_size(chunk_to_make))

    models = fsdb.get_models()
    buf.parallel_fill(list(itertools.chain.from_iterable(files)),
                      threads=threads)
    print("Filled buffer, watching for new games")

    while (fsdb.get_latest_model() == models[-1]
           or buf.total_updates < MINIMUM_NEW_GAMES):
        with timer("Rsync"):
            time_rsync(start_from - dt.timedelta(minutes=60))
        start_from = dt.datetime.utcnow()
        hours = sorted(fsdb.get_hour_dirs(LOCAL_DIR))
        new_files = list(
            map(lambda d: tf.gfile.Glob(os.path.join(LOCAL_DIR, d, '*.zz')),
                hours[-2:]))
        buf.update(list(itertools.chain.from_iterable(new_files)))
        if fast_write:
            break
        time.sleep(30)
        if fsdb.get_latest_model() != models[-1]:
            print("New model!  Waiting for games. Got", buf.total_updates,
                  "new games so far")

    latest = fsdb.get_latest_model()
    print("New model!", latest[1], "!=", models[-1][1])
    print(buf)
    buf.flush(chunk_to_make)
Пример #7
0
def loop(unused_argv):
    if len(fsdb.get_models()) == 0:
        # TODO(amj): Do bootstrap here.
        pass
    while True:
        print("=" * 40, flush=True)
        with utils.timer("Train"):
            completed_process = train()
        if completed_process.returncode > 0:
            print("Training failed, aborting.")
            sys.exit(1)

        with utils.timer("Validate"):
            if not FLAGS.pro_dataset:
                print("*** --pro_dataset not set, skipping pro validation ***")
            else:
                validate_pro()
Пример #8
0
def main():
    root = os.path.abspath(
        os.path.join("sgf", fsdb.FLAGS.bucket_name, "sgf/eval"))
    if FLAGS.sync_ratings:
        sync(root)

    models = fsdb.get_models()
    data = wins_subset(fsdb.models_dir())
    print("win subset", len(data), "games")
    r = compute_ratings(data)
    for v, k in sorted([(v, k) for k, v in r.items()])[-20:][::-1]:
        print("Top model({}) {}: {}".format(k, model_num_for(k), v))

    db = sqlite3.connect("ratings.db")
    print("db has",
          db.execute("select count(*) from wins").fetchone()[0], "games")
    for m in models[-10:]:
        m_id = model_id(m[0])
        if m_id in r:
            rat, sigma = r[m_id]
            print("{:>30}:  {:.2f} ({:.3f})".format(m[1], rat, sigma))
        else:
            print("{}, Model id not found({})".format(m[1], m_id))
def make_chunk_for(output_dir=LOCAL_DIR,
                   local_dir=LOCAL_DIR,
                   game_dir=None,
                   model_num=1,
                   positions=EXAMPLES_PER_GENERATION,
                   threads=8,
                   sampling_frac=0.02):
    """
    Explicitly make a golden chunk for a given model `model_num`
    (not necessarily the most recent one).

      While we haven't yet got enough samples (EXAMPLES_PER_GENERATION)
      Add samples from the games of previous model.
    """
    game_dir = game_dir or fsdb.selfplay_dir()
    ensure_dir_exists(output_dir)
    models = [model for model in fsdb.get_models() if model[0] < model_num]
    buf = ExampleBuffer(positions, sampling_frac=sampling_frac)
    files = []
    for _, model in sorted(models, reverse=True):
        local_model_dir = os.path.join(local_dir, model)
        if not tf.gfile.Exists(local_model_dir):
            print("Rsyncing", model)
            _rsync_dir(os.path.join(game_dir, model), local_model_dir)
        files.extend(tf.gfile.Glob(os.path.join(local_model_dir, '*.zz')))
        print("{}: {} games".format(model, len(files)))
        if len(files) * 200 * sampling_frac > positions:
            break

    print("Filling from {} files".format(len(files)))

    buf.parallel_fill(files, threads=threads)
    print(buf)
    output = os.path.join(output_dir, str(model_num) + '.tfrecord.zz')
    print("Writing to", output)
    buf.flush(output)